sincosf.h source code [llvm_projects/libc/src/__support/math/sincosf.h]

1	//===-- Implementation header for sincosf ------------------------- C++--===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_SINCOSF_H
10	#define LLVM_LIBC_SRC___SUPPORT_MATH_SINCOSF_H
11
12	#include "sincosf_utils.h"
13	#include "src/__support/FPUtil/FEnvImpl.h"
14	#include "src/__support/FPUtil/FPBits.h"
15	#include "src/__support/FPUtil/multiply_add.h"
16	#include "src/__support/FPUtil/rounding_mode.h"
17	#include "src/__support/common.h"
18	#include "src/__support/macros/config.h"
19	#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
20	#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
21
22	namespace LIBC_NAMESPACE_DECL {
23
24	namespace math {
25
26	namespace sincosf_internal {
27
28	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
29	// Exceptional values
30	LIBC_INLINE_VAR constexpr int N_EXCEPTS = `6`;
31
32	LIBC_INLINE_VAR constexpr uint32_t EXCEPT_INPUTS[N_EXCEPTS] = {
33	`0x46199998`, // x = 0x1.33333p13 x
34	`0x55325019`, // x = 0x1.64a032p43 x
35	`0x5922aa80`, // x = 0x1.4555p51 x
36	`0x5f18b878`, // x = 0x1.3170fp63 x
37	`0x6115cb11`, // x = 0x1.2b9622p67 x
38	`0x7beef5ef`, // x = 0x1.ddebdep120 x
39	};
40
41	LIBC_INLINE_VAR constexpr uint32_t EXCEPT_OUTPUTS_SIN[N_EXCEPTS][`4`] = {
42	{`0xbeb1fa5d`, `0`, `1`, `0`}, // x = 0x1.33333p13, sin(x) = -0x1.63f4bap-2 (RZ)
43	{`0xbf171adf`, `0`, `1`, `1`}, // x = 0x1.64a032p43, sin(x) = -0x1.2e35bep-1 (RZ)
44	{`0xbf587521`, `0`, `1`, `1`}, // x = 0x1.4555p51, sin(x) = -0x1.b0ea42p-1 (RZ)
45	{`0x3dad60f6`, `1`, `0`, `1`}, // x = 0x1.3170fp63, sin(x) = 0x1.5ac1ecp-4 (RZ)
46	{`0xbe7cc1e0`, `0`, `1`, `1`}, // x = 0x1.2b9622p67, sin(x) = -0x1.f983cp-3 (RZ)
47	{`0xbf587d1b`, `0`, `1`, `1`}, // x = 0x1.ddebdep120, sin(x) = -0x1.b0fa36p-1 (RZ)
48	};
49
50	LIBC_INLINE_VAR constexpr uint32_t EXCEPT_OUTPUTS_COS[N_EXCEPTS][`4`] = {
51	{`0xbf70090b`, `0`, `1`, `0`}, // x = 0x1.33333p13, cos(x) = -0x1.e01216p-1 (RZ)
52	{`0x3f4ea5d2`, `1`, `0`, `0`}, // x = 0x1.64a032p43, cos(x) = 0x1.9d4ba4p-1 (RZ)
53	{`0x3f08aebe`, `1`, `0`, `1`}, // x = 0x1.4555p51, cos(x) = 0x1.115d7cp-1 (RZ)
54	{`0x3f7f14bb`, `1`, `0`, `0`}, // x = 0x1.3170fp63, cos(x) = 0x1.fe2976p-1 (RZ)
55	{`0x3f78142e`, `1`, `0`, `1`}, // x = 0x1.2b9622p67, cos(x) = 0x1.f0285cp-1 (RZ)
56	{`0x3f08a21c`, `1`, `0`, `0`}, // x = 0x1.ddebdep120, cos(x) = 0x1.114438p-1 (RZ)
57	};
58	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
59	} // namespace sincosf_internal
60
61	LIBC_INLINE void sincosf(float x, float sinp, float* *cosp) {
62	using namespace sincosf_internal;
63	using namespace sincosf_utils_internal;
64	using FPBits = typename fputil::FPBits<float>;
65	FPBits xbits(x);
66
67	uint32_t x_abs = xbits.uintval() & `0x7fff'ffffU`;
68	double xd = static_cast<double>(x);
69
70	// Range reduction:
71	// For \|x\| >= 2^-12, we perform range reduction as follows:
72	// Find k and y such that:
73	// x = (k + y) pi/32*
74	// k is an integer
75	// \|y\| < 0.5
76	// For small range (\|x\| < 2^45 when FMA instructions are available, 2^22
77	// otherwise), this is done by performing:
78	// k = round(x 32/pi)*
79	// y = x 32/pi - k*
80	// For large range, we will omit all the higher parts of 32/pi such that the
81	// least significant bits of their full products with x are larger than 63,
82	// since:
83	// sin((k + y + 64i) * pi/32) = sin(x + i * 2pi) = sin(x), and*
84	// cos((k + y + 64i) * pi/32) = cos(x + i * 2pi) = cos(x).*
85	//
86	// When FMA instructions are not available, we store the digits of 32/pi in
87	// chunks of 28-bit precision. This will make sure that the products:
88	// x THIRTYTWO_OVER_PI_28[i] are all exact.*
89	// When FMA instructions are available, we simply store the digits of326/pi in
90	// chunks of doubles (53-bit of precision).
91	// So when multiplying by the largest values of single precision, the
92	// resulting output should be correct up to 2^(-208 + 128) ~ 2^-80. By the
93	// worst-case analysis of range reduction, \|y\| >= 2^-38, so this should give
94	// us more than 40 bits of accuracy. For the worst-case estimation of range
95	// reduction, see for instances:
96	// Elementary Functions by J-M. Muller, Chapter 11,
97	// Handbook of Floating-Point Arithmetic by J-M. Muller et. al.,
98	// Chapter 10.2.
99	//
100	// Once k and y are computed, we then deduce the answer by the sine and cosine
101	// of sum formulas:
102	// sin(x) = sin((k + y)pi/32)*
103	// = sin(ypi/32) * cos(kpi/32) + cos(ypi/32) * sin(kpi/32)
104	// cos(x) = cos((k + y)pi/32)*
105	// = cos(ypi/32) * cos(kpi/32) - sin(ypi/32) * sin(kpi/32)
106	// The values of sin(kpi/32) and cos(kpi/32) for k = 0..63 are precomputed
107	// and stored using a vector of 32 doubles. Sin(ypi/32) and cos(ypi/32) are
108	// computed using degree-7 and degree-6 minimax polynomials generated by
109	// Sollya respectively.
110
111	// \|x\| < 0x1.0p-12f
112	if (LIBC_UNLIKELY(x_abs < `0x3980'0000U`)) {
113	if (LIBC_UNLIKELY(x_abs == `0U`)) {
114	// For signed zeros.
115	*sinp = x;
116	*cosp = `1.0f`;
117	return;
118	}
119	// When \|x\| < 2^-12, the relative errors of the approximations
120	// sin(x) ~ x, cos(x) ~ 1
121	// are:
122	// \|sin(x) - x\| / \|sin(x)\| < \|x^3\| / (6\|x\|)
123	// = x^2 / 6
124	// < 2^-25
125	// < epsilon(1)/2.
126	// \|cos(x) - 1\| < \|x^2 / 2\| = 2^-25 < epsilon(1)/2.
127	// So the correctly rounded values of sin(x) and cos(x) are:
128	// sin(x) = x - sign(x)eps(x) if rounding mode = FE_TOWARDZERO,*
129	// or (rounding mode = FE_UPWARD and x is
130	// negative),
131	// = x otherwise.
132	// cos(x) = 1 - eps(x) if rounding mode = FE_TOWARDZERO or FE_DOWWARD,
133	// = 1 otherwise.
134	// To simplify the rounding decision and make it more efficient and to
135	// prevent compiler to perform constant folding, we use
136	// sin(x) = fma(x, -2^-25, x),
137	// cos(x) = fma(x0.5f, -x, 1)*
138	// instead.
139	// Note: to use the formula x - 2^-25x to decide the correct rounding, we*
140	// do need fma(x, -2^-25, x) to prevent underflow caused by -2^-25x when*
141	// \|x\| < 2^-125. For targets without FMA instructions, we simply use
142	// double for intermediate results as it is more efficient than using an
143	// emulated version of FMA.
144	#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
145	*sinp = fputil::multiply_add(x, -`0x1.0p-25f`, x);
146	*cosp = fputil::multiply_add(FPBits(x_abs).get_val(), -`0x1.0p-25f`, `1.0f`);
147	#else
148	sinp = static_cast<float*>(fputil::multiply_add(x: xd, y: -`0x1.0p-25`, z: xd));
149	cosp = static_cast<float*>(fputil::multiply_add(
150	x: static_cast<double>(FPBits (x_abs).get_val()), y: -`0x1.0p-25`, z: `1.0`));
151	#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
152	return;
153	}
154
155	// x is inf or nan.
156	if (LIBC_UNLIKELY(x_abs >= `0x7f80'0000U`)) {
157	if (xbits.is_signaling_nan()) {
158	fputil::raise_except_if_required(FE_INVALID);
159	sinp = cosp = FPBits::quiet_nan().get_val();
160	return;
161	}
162
163	if (x_abs == `0x7f80'0000U`) {
164	fputil::set_errno_if_required(EDOM);
165	fputil::raise_except_if_required(FE_INVALID);
166	}
167	*sinp = FPBits::quiet_nan().get_val();
168	cosp = sinp;
169	return;
170	}
171
172	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
173	// Check exceptional values.
174	for (int i = `0`; i < N_EXCEPTS; ++i) {
175	if (LIBC_UNLIKELY(x_abs == EXCEPT_INPUTS[i])) {
176	uint32_t s = EXCEPT_OUTPUTS_SIN[i][`0`]; // FE_TOWARDZERO
177	uint32_t c = EXCEPT_OUTPUTS_COS[i][`0`]; // FE_TOWARDZERO
178	bool x_sign = x < `0`;
179	#ifdef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
180	s += EXCEPT_OUTPUTS_SIN[i][`3`];
181	c += EXCEPT_OUTPUTS_COS[i][`3`];
182	#else
183	switch (fputil::quick_get_round()) {
184	case FE_UPWARD:
185	s += x_sign ? EXCEPT_OUTPUTS_SIN[i][`2`] : EXCEPT_OUTPUTS_SIN[i][`1`];
186	c += EXCEPT_OUTPUTS_COS[i][`1`];
187	break;
188	case FE_DOWNWARD:
189	s += x_sign ? EXCEPT_OUTPUTS_SIN[i][`1`] : EXCEPT_OUTPUTS_SIN[i][`2`];
190	c += EXCEPT_OUTPUTS_COS[i][`2`];
191	break;
192	case FE_TONEAREST:
193	s += EXCEPT_OUTPUTS_SIN[i][`3`];
194	c += EXCEPT_OUTPUTS_COS[i][`3`];
195	break;
196	}
197	#endif // LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
198	*sinp = x_sign ? -FPBits (s).get_val() : FPBits (s).get_val();
199	*cosp = FPBits (c).get_val();
200
201	return;
202	}
203	}
204	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
205
206	// Combine the results with the sine and cosine of sum formulas:
207	// sin(x) = sin((k + y)pi/32)*
208	// = sin(ypi/32) * cos(kpi/32) + cos(ypi/32) * sin(kpi/32)
209	// = sin_y cos_k + (1 + cosm1_y) * sin_k*
210	// = sin_y cos_k + (cosm1_y * sin_k + sin_k)*
211	// cos(x) = cos((k + y)pi/32)*
212	// = cos(ypi/32) * cos(kpi/32) - sin(ypi/32) * sin(kpi/32)
213	// = cosm1_y cos_k + sin_y * sin_k*
214	// = (cosm1_y cos_k + cos_k) + sin_y * sin_k*
215	double sin_k = `0`;
216	double cos_k = `0`;
217	double sin_y = `0`;
218	double cosm1_y = `0`;
219
220	sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y);
221
222	sinp = static_cast<float*>(fputil::multiply_add(
223	x: sin_y, y: cos_k, z: fputil::multiply_add(x: cosm1_y, y: sin_k, z: sin_k)));
224	cosp = static_cast<float*>(fputil::multiply_add(
225	x: sin_y, y: -sin_k, z: fputil::multiply_add(x: cosm1_y, y: cos_k, z: cos_k)));
226	}
227
228	} // namespace math
229
230	} // namespace LIBC_NAMESPACE_DECL
231
232	#endif // LLVM_LIBC_SRC___SUPPORT_MATH_SINCOSF_H
233

Browse the source code of llvm_projects/libc/src/__support/math/sincosf.h