cos.h source code [llvm_projects/libc/src/__support/math/cos.h]

1	//===-- Implementation header for cos ---------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_COS_H
10	#define LLVM_LIBC_SRC___SUPPORT_MATH_COS_H
11
12	#include "range_reduction_double_common.h"
13	#include "sincos_eval.h"
14	#include "src/__support/FPUtil/FEnvImpl.h"
15	#include "src/__support/FPUtil/FPBits.h"
16	#include "src/__support/FPUtil/double_double.h"
17	#include "src/__support/FPUtil/dyadic_float.h"
18	#include "src/__support/FPUtil/except_value_utils.h"
19	#include "src/__support/macros/config.h"
20	#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
21	#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
22
23	#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
24	#include "range_reduction_double_fma.h"
25	#else
26	#include "range_reduction_double_nofma.h"
27	#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
28
29	namespace LIBC_NAMESPACE_DECL {
30
31	namespace math {
32
33	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
34	LIBC_INLINE double
35	cos_accurate(double x, uint16_t x_e, unsigned k,
36	const range_reduction_double_internal::LargeRangeReduction
37	&range_reduction_large) {
38	using namespace math::range_reduction_double_internal;
39	using FPBits = typename fputil::FPBits<double>;
40
41	DFloat128 u_f128, sin_u, cos_u;
42	if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT))
43	u_f128 = range_reduction_small_f128(x);
44	else
45	u_f128 = range_reduction_large.accurate();
46
47	math::sincos_eval_internal::sincos_eval(u: u_f128, sin_u, cos_u);
48
49	auto get_sin_k = [](unsigned kk) -> DFloat128 {
50	unsigned idx = (kk & `64`) ? `64` - (kk & `63`) : (kk & `63`);
51	DFloat128 ans = SIN_K_PI_OVER_128_F128[idx];
52	if (kk & `128`)
53	ans.sign = Sign::NEG;
54	return ans;
55	};
56
57	// -sin(k pi/128) = sin((k + 128) * pi/128)*
58	// cos(k pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).*
59	DFloat128 msin_k_f128 = get_sin_k(k + `128`);
60	DFloat128 cos_k_f128 = get_sin_k(k + `64`);
61
62	// cos(x) = cos((k pi/128 + u)*
63	// = cos(u) cos(kpi/128) - sin(u) sin(kpi/128)
64	DFloat128 r = fputil::quick_add(a: fputil::quick_mul(a: cos_k_f128, b: cos_u),
65	b: fputil::quick_mul(a: msin_k_f128, b: sin_u));
66
67	// TODO: Add assertion if Ziv's accuracy tests fail in debug mode.
68	// https://github.com/llvm/llvm-project/issues/96452.
69
70	return static_cast<double>(r);
71	}
72	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
73
74	LIBC_INLINE double cos(double x) {
75	using namespace range_reduction_double_internal;
76	using FPBits = typename fputil::FPBits<double>;
77	FPBits xbits(x);
78
79	uint16_t x_e = xbits.get_biased_exponent();
80
81	DoubleDouble y;
82	unsigned k = `0`;
83	LargeRangeReduction range_reduction_large;
84
85	// \|x\| < 2^16.
86	if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) {
87	// \|x\| < 2^-4
88	if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - `4`)) {
89	// \|x\| < 2^-27
90	if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - `27`)) {
91	// Signed zeros.
92	if (LIBC_UNLIKELY(x == `0.0`))
93	return `1.0`;
94
95	// For \|x\| < 2^-27, \|cos(x) - 1\| < \|x\|^2/2 < 2^-54 = ulp(1 - 2^-53)/2.
96	return fputil::round_result_slightly_down(value_rn: `1.0`);
97	}
98	// No range reduction needed.
99
100	// Use degree-8 polynomial approximation:
101	// cos(x) ~ 1 + a1 x^2 + a2 * x^4 + a3 * x^6 + a4 * x^8*
102	// ~ 1 + x^2 Q(x^2).*
103	// > P = fpminimax(cos(x), [\|0, 2, 4, 6, 8\|], [\|1, D...\|], [0, 2^-4]);
104	// > dirtyinfnorm(cos(x) - P, [-2^-4, 2^-4]);
105	// 0x1.3cfe...p-70
106	// > P;
107	constexpr double COEFFS[] = {-`0x1p-1`, `0x1.5555555555262p-5`,
108	-`0x1.6c16c1508bff1p-10`,
109	`0x1.a00ffd769159ap-16`};
110	double x_sq = x * x;
111	double c0 = fputil::multiply_add(x: x_sq, y: COEFFS[`1`], z: COEFFS[`0`]);
112	double c1 = fputil::multiply_add(x: x_sq, y: COEFFS[`3`], z: COEFFS[`2`]);
113	double x4 = x_sq * x_sq;
114	double r_lo = fputil::multiply_add(x: x4, y: c1, z: c0) * x_sq;
115
116	#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
117	return `1.0` + r_lo;
118	#else
119	// Overall errors <= ulp(x^2/2) + 2^-69.
120	double err = fputil::multiply_add(x: x_sq, y: `0x1.0p-53`, z: `0x1.0p-69`);
121	double r_lo_u = r_lo + err;
122	double r_lo_l = r_lo - err;
123	double r_upper = `1.0` + r_lo_u;
124	double r_lower = `1.0` + r_lo_l;
125
126	if (LIBC_LIKELY(r_upper == r_lower))
127	return r_upper;
128
129	k = range_reduction_small(x, u&: y);
130	return cos_accurate(x, x_e, k, range_reduction_large);
131	#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
132	} else {
133	// Small range reduction.
134	k = range_reduction_small(x, u&: y);
135	}
136	} else {
137	// Inf or NaN
138	if (LIBC_UNLIKELY(x_e > `2` * FPBits::EXP_BIAS)) {
139	if (xbits.is_signaling_nan()) {
140	fputil::raise_except_if_required(FE_INVALID);
141	return FPBits::quiet_nan().get_val();
142	}
143	// cos(+-Inf) = NaN
144	if (xbits.get_mantissa() == `0`) {
145	fputil::set_errno_if_required(EDOM);
146	fputil::raise_except_if_required(FE_INVALID);
147	}
148	return x + FPBits::quiet_nan().get_val();
149	}
150
151	// Large range reduction.
152	k = range_reduction_large.fast(x, u&: y);
153	}
154
155	DoubleDouble sin_y, cos_y;
156
157	[[maybe_unused]] double err =
158	math::sincos_eval_internal::sincos_eval(u: y, sin_u&: sin_y, cos_u&: cos_y);
159
160	// Look up sin(k pi/128) and cos(k * pi/128)*
161	#ifdef LIBC_MATH_HAS_SMALL_TABLES
162	// Memory saving versions. Use 65-entry table.
163	auto get_idx_dd = [](unsigned kk) -> DoubleDouble {
164	unsigned idx = (kk & `64`) ? `64` - (kk & `63`) : (kk & `63`);
165	DoubleDouble ans = SIN_K_PI_OVER_128[idx];
166	if (kk & `128`) {
167	ans.hi = -ans.hi;
168	ans.lo = -ans.lo;
169	}
170	return ans;
171	};
172	DoubleDouble msin_k = get_idx_dd(k + `128`);
173	DoubleDouble cos_k = get_idx_dd(k + `64`);
174	#else
175	// Fast look up version, but needs 256-entry table.
176	// -sin(k pi/128) = sin((k + 128) * pi/128)*
177	// cos(k pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).*
178	DoubleDouble msin_k = SIN_K_PI_OVER_128[(k + `128`) & `255`];
179	DoubleDouble cos_k = SIN_K_PI_OVER_128[(k + `64`) & `255`];
180	#endif // LIBC_MATH_HAS_SMALL_TABLES
181
182	// After range reduction, k = round(x 128 / pi) and y = x - k * (pi / 128).*
183	// So k is an integer and -pi / 256 <= y <= pi / 256.
184	// Then cos(x) = cos((k pi/128 + y)*
185	// = cos(y) cos(kpi/128) - sin(y) sin(kpi/128)
186	DoubleDouble cos_k_cos_y = fputil::quick_mult(a: cos_y, b: cos_k);
187	DoubleDouble msin_k_sin_y = fputil::quick_mult(a: sin_y, b: msin_k);
188	// When k != 64 mod 128,
189	// \|cos( k pi/128 )\| > pi/128 - epsilon > \|y\| >= \|sin(y)\|,*
190	// and cos(y) > 1 - pi/128. So we can use Fast2Sum for the subtraction:
191	// cos(y) cos(kpi/128) - sin(y) sin(kpi/128).
192	DoubleDouble rr = fputil::exact_add(a: cos_k_cos_y.hi, b: msin_k_sin_y.hi);
193	rr.lo += msin_k_sin_y.lo + cos_k_cos_y.lo;
194
195	#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
196	return rr.hi + rr.lo;
197	#else
198	double rlp = rr.lo + err;
199	double rlm = rr.lo - err;
200
201	double r_upper = rr.hi + rlp; // (rr.lo + ERR);
202	double r_lower = rr.hi + rlm; // (rr.lo - ERR);
203
204	// Ziv's rounding test.
205	if (LIBC_LIKELY(r_upper == r_lower))
206	return r_upper;
207
208	return cos_accurate(x, x_e, k, range_reduction_large);
209	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
210	}
211
212	} // namespace math
213
214	} // namespace LIBC_NAMESPACE_DECL
215
216	#endif // LLVM_LIBC_SRC___SUPPORT_MATH_COS_H
217

Browse the source code of llvm_projects/libc/src/__support/math/cos.h