atan2f128.h source code [llvm_projects/libc/src/__support/math/atan2f128.h]

1	//===-- Implementation header for atan2f128 ---------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F128_H
10	#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F128_H
11
12	#include "include/llvm-libc-types/float128.h"
13
14	#ifdef LIBC_TYPES_HAS_FLOAT128
15
16	#include "atan_utils.h"
17	#include "src/__support/FPUtil/FPBits.h"
18	#include "src/__support/FPUtil/dyadic_float.h"
19	#include "src/__support/FPUtil/nearest_integer.h"
20	#include "src/__support/integer_literals.h"
21	#include "src/__support/macros/config.h"
22	#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
23	#include "src/__support/uint128.h"
24
25	namespace LIBC_NAMESPACE_DECL {
26
27	namespace math {
28
29	// There are several range reduction steps we can take for atan2(y, x) as
30	// follow:
31
32	// Range reduction 1: signness*
33	// atan2(y, x) will return a number between -PI and PI representing the angle
34	// forming by the 0x axis and the vector (x, y) on the 0xy-plane.
35	// In particular, we have that:
36	// atan2(y, x) = atan( y/x ) if x >= 0 and y >= 0 (I-quadrant)
37	// = pi + atan( y/x ) if x < 0 and y >= 0 (II-quadrant)
38	// = -pi + atan( y/x ) if x < 0 and y < 0 (III-quadrant)
39	// = atan( y/x ) if x >= 0 and y < 0 (IV-quadrant)
40	// Since atan function is odd, we can use the formula:
41	// atan(-u) = -atan(u)
42	// to adjust the above conditions a bit further:
43	// atan2(y, x) = atan( \|y\|/\|x\| ) if x >= 0 and y >= 0 (I-quadrant)
44	// = pi - atan( \|y\|/\|x\| ) if x < 0 and y >= 0 (II-quadrant)
45	// = -pi + atan( \|y\|/\|x\| ) if x < 0 and y < 0 (III-quadrant)
46	// = -atan( \|y\|/\|x\| ) if x >= 0 and y < 0 (IV-quadrant)
47	// Which can be simplified to:
48	// atan2(y, x) = sign(y) atan( \|y\|/\|x\| ) if x >= 0*
49	// = sign(y) (pi - atan( \|y\|/\|x\| )) if x < 0*
50
51	// Range reduction 2: reciprocal*
52	// Now that the argument inside atan is positive, we can use the formula:
53	// atan(1/x) = pi/2 - atan(x)
54	// to make the argument inside atan <= 1 as follow:
55	// atan2(y, x) = sign(y) atan( \|y\|/\|x\|) if 0 <= \|y\| <= x*
56	// = sign(y) (pi/2 - atan( \|x\|/\|y\| ) if 0 <= x < \|y\|*
57	// = sign(y) (pi - atan( \|y\|/\|x\| )) if 0 <= \|y\| <= -x*
58	// = sign(y) (pi/2 + atan( \|x\|/\|y\| )) if 0 <= -x < \|y\|*
59
60	// Range reduction 3: look up table.*
61	// After the previous two range reduction steps, we reduce the problem to
62	// compute atan(u) with 0 <= u <= 1, or to be precise:
63	// atan( n / d ) where n = min(\|x\|, \|y\|) and d = max(\|x\|, \|y\|).
64	// An accurate polynomial approximation for the whole [0, 1] input range will
65	// require a very large degree. To make it more efficient, we reduce the input
66	// range further by finding an integer idx such that:
67	// \| n/d - idx/64 \| <= 1/128.
68	// In particular,
69	// idx := round(2^6 n/d)*
70	// Then for the fast pass, we find a polynomial approximation for:
71	// atan( n/d ) ~ atan( idx/64 ) + (n/d - idx/64) Q(n/d - idx/64)*
72	// For the accurate pass, we use the addition formula:
73	// atan( n/d ) - atan( idx/64 ) = atan( (n/d - idx/64)/(1 + (nidx)/(64d)) )
74	// = atan( (n - d(idx/64))/(d + n(idx/64)) )
75	// And for the fast pass, we use degree-13 minimax polynomial to compute the
76	// RHS:
77	// atan(u) ~ P(u) = u - c_3 u^3 + c_5 * u^5 - c_7 * u^7 + c_9 u^9 -
78	// - c_11 u^11 + c_13 * u^13*
79	// with absolute errors bounded by:
80	// \|atan(u) - P(u)\| < 2^-121
81	// and relative errors bounded by:
82	// \|(atan(u) - P(u)) / P(u)\| < 2^-114.
83
84	LIBC_INLINE float128 atan2f128(float128 y, float128 x) {
85	using DFloat128 = fputil::DyadicFloat<`128`>;
86
87	constexpr DFloat128 ZERO = {Sign::POS, `0`, `0_u128`};
88	constexpr DFloat128 MZERO = {Sign::NEG, `0`, `0_u128`};
89	constexpr DFloat128 PI = {Sign::POS, -`126`,
90	`0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128`};
91	constexpr DFloat128 MPI = {Sign::NEG, -`126`,
92	`0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128`};
93	constexpr DFloat128 PI_OVER_2 = {Sign::POS, -`127`,
94	`0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128`};
95	constexpr DFloat128 MPI_OVER_2 = {Sign::NEG, -`127`,
96	`0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128`};
97	constexpr DFloat128 PI_OVER_4 = {Sign::POS, -`128`,
98	`0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128`};
99	constexpr DFloat128 THREE_PI_OVER_4 = {
100	Sign::POS, -`128`, `0x96cbe3f9'990e91a7'9394c9e8'a0a5159d_u128`};
101
102	// Adjustment for constant term:
103	// CONST_ADJ[x_sign][y_sign][recip]
104	constexpr DFloat128 CONST_ADJ[`2`][`2`][`2`] = {
105	{{ZERO, MPI_OVER_2}, {MZERO, MPI_OVER_2}},
106	{{MPI, PI_OVER_2}, {MPI, PI_OVER_2}}};
107
108	using namespace atan_internal;
109	using FPBits = fputil::FPBits<float128>;
110	using DFloat128 = fputil::DyadicFloat<`128`>;
111
112	FPBits x_bits(x), y_bits(y);
113	bool x_sign = x_bits.sign().is_neg();
114	bool y_sign = y_bits.sign().is_neg();
115	x_bits = x_bits.abs();
116	y_bits = y_bits.abs();
117	UInt128 x_abs = x_bits.uintval();
118	UInt128 y_abs = y_bits.uintval();
119	bool recip = x_abs < y_abs;
120	UInt128 min_abs = recip ? x_abs : y_abs;
121	UInt128 max_abs = !recip ? x_abs : y_abs;
122	unsigned min_exp = static_cast<unsigned>(min_abs >> FPBits::FRACTION_LEN);
123	unsigned max_exp = static_cast<unsigned>(max_abs >> FPBits::FRACTION_LEN);
124
125	DFloat128 num(FPBits (min_abs).get_val());
126	DFloat128 den(FPBits (max_abs).get_val());
127
128	// Check for exceptional cases, whether inputs are 0, inf, nan, or close to
129	// overflow, or close to underflow.
130	if (LIBC_UNLIKELY(max_exp >= `0x7fffU` \|\| min_exp == `0U`)) {
131	if (x_bits.is_nan() \|\| y_bits.is_nan())
132	return FPBits::quiet_nan().get_val();
133	unsigned x_except = x == `0` ? `0` : (FPBits (x_abs).is_inf() ? `2` : `1`);
134	unsigned y_except = y == `0` ? `0` : (FPBits (y_abs).is_inf() ? `2` : `1`);
135
136	// Exceptional cases:
137	// EXCEPT[y_except][x_except][x_is_neg]
138	// with x_except & y_except:
139	// 0: zero
140	// 1: finite, non-zero
141	// 2: infinity
142	constexpr DFloat128 EXCEPTS[`3`][`3`][`2`] = {
143	{{ZERO, PI}, {ZERO, PI}, {ZERO, PI}},
144	{{PI_OVER_2, PI_OVER_2}, {ZERO, ZERO}, {ZERO, PI}},
145	{{PI_OVER_2, PI_OVER_2},
146	{PI_OVER_2, PI_OVER_2},
147	{PI_OVER_4, THREE_PI_OVER_4}},
148	};
149
150	if ((x_except != `1`) \|\| (y_except != `1`)) {
151	DFloat128 r = EXCEPTS[y_except][x_except][x_sign];
152	if (y_sign)
153	r.sign = r.sign.negate();
154	return static_cast<float128>(r);
155	}
156	}
157
158	bool final_sign = ((x_sign != y_sign) != recip);
159	DFloat128 const_term = CONST_ADJ[x_sign][y_sign][recip];
160	int exp_diff = den.exponent - num.exponent;
161	// We have the following bound for normalized n and d:
162	// 2^(-exp_diff - 1) < n/d < 2^(-exp_diff + 1).
163	if (LIBC_UNLIKELY(exp_diff > FPBits::FRACTION_LEN + `2`)) {
164	DFloat128 quotient = rounded_div(af: num, bf: den);
165	DFloat128 result = quick_add(a: const_term, b: quotient);
166	if (final_sign)
167	result.sign = result.sign.negate();
168	return static_cast<float128>(result);
169	}
170
171	// Take 24 leading bits of num and den to convert to float for fast division.
172	// We also multiply the numerator by 64 using integer addition directly to the
173	// exponent field.
174	float num_f =
175	cpp::bit_cast<float>(from: static_cast<uint32_t>(num.mantissa >> `104`) +
176	(`6U` << fputil::FPBits<float>::FRACTION_LEN));
177	float den_f = cpp::bit_cast<float>(
178	from: static_cast<uint32_t>(den.mantissa >> `104`) +
179	(static_cast<uint32_t>(exp_diff) << fputil::FPBits<float>::FRACTION_LEN));
180
181	float k = fputil::nearest_integer(x: num_f / den_f);
182	unsigned idx = static_cast<unsigned>(k);
183
184	// k_f128 = idx / 64
185	DFloat128 k_f128(Sign::POS, -`6`, DFloat128::MantissaType(idx));
186
187	// Range reduction:
188	// atan(n/d) - atan(k) = atan((n/d - k/64) / (1 + (n/d) (k/64)))*
189	// = atan((n - d k/64)) / (d + n * k/64))*
190	// num_f128 = n - d k/64*
191	DFloat128 num_f128 = fputil::multiply_add(a: den, b: -k_f128, c: num);
192	// den_f128 = d + n k/64*
193	DFloat128 den_f128 = fputil::multiply_add(a: num, b: k_f128, c: den);
194
195	// q = (n - d k) / (d + n * k)*
196	DFloat128 q =
197	fputil::quick_mul(a: num_f128, b: fputil::approx_reciprocal(a: den_f128));
198	// p ~ atan(q)
199	DFloat128 p = atan_eval(x: q);
200
201	DFloat128 r =
202	fputil::quick_add(a: const_term, b: fputil::quick_add(a: ATAN_I_F128[idx], b: p));
203	if (final_sign)
204	r.sign = r.sign.negate();
205
206	return static_cast<float128>(r);
207	}
208
209	} // namespace math
210
211	} // namespace LIBC_NAMESPACE_DECL
212
213	#endif // LIBC_TYPES_HAS_FLOAT128
214
215	#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F128_H
216

Browse the source code of llvm_projects/libc/src/__support/math/atan2f128.h