1//===-- Implementation header for sincosf ------------------------*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_SINCOSF_H
10#define LLVM_LIBC_SRC___SUPPORT_MATH_SINCOSF_H
11
12#include "sincosf_utils.h"
13#include "src/__support/FPUtil/FEnvImpl.h"
14#include "src/__support/FPUtil/FPBits.h"
15#include "src/__support/FPUtil/multiply_add.h"
16#include "src/__support/FPUtil/rounding_mode.h"
17#include "src/__support/common.h"
18#include "src/__support/macros/config.h"
19#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
20#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
21
22namespace LIBC_NAMESPACE_DECL {
23
24namespace math {
25
26namespace sincosf_internal {
27
28#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
29// Exceptional values
30LIBC_INLINE_VAR constexpr int N_EXCEPTS = 6;
31
32LIBC_INLINE_VAR constexpr uint32_t EXCEPT_INPUTS[N_EXCEPTS] = {
33 0x46199998, // x = 0x1.33333p13 x
34 0x55325019, // x = 0x1.64a032p43 x
35 0x5922aa80, // x = 0x1.4555p51 x
36 0x5f18b878, // x = 0x1.3170fp63 x
37 0x6115cb11, // x = 0x1.2b9622p67 x
38 0x7beef5ef, // x = 0x1.ddebdep120 x
39};
40
41LIBC_INLINE_VAR constexpr uint32_t EXCEPT_OUTPUTS_SIN[N_EXCEPTS][4] = {
42 {0xbeb1fa5d, 0, 1, 0}, // x = 0x1.33333p13, sin(x) = -0x1.63f4bap-2 (RZ)
43 {0xbf171adf, 0, 1, 1}, // x = 0x1.64a032p43, sin(x) = -0x1.2e35bep-1 (RZ)
44 {0xbf587521, 0, 1, 1}, // x = 0x1.4555p51, sin(x) = -0x1.b0ea42p-1 (RZ)
45 {0x3dad60f6, 1, 0, 1}, // x = 0x1.3170fp63, sin(x) = 0x1.5ac1ecp-4 (RZ)
46 {0xbe7cc1e0, 0, 1, 1}, // x = 0x1.2b9622p67, sin(x) = -0x1.f983cp-3 (RZ)
47 {0xbf587d1b, 0, 1, 1}, // x = 0x1.ddebdep120, sin(x) = -0x1.b0fa36p-1 (RZ)
48};
49
50LIBC_INLINE_VAR constexpr uint32_t EXCEPT_OUTPUTS_COS[N_EXCEPTS][4] = {
51 {0xbf70090b, 0, 1, 0}, // x = 0x1.33333p13, cos(x) = -0x1.e01216p-1 (RZ)
52 {0x3f4ea5d2, 1, 0, 0}, // x = 0x1.64a032p43, cos(x) = 0x1.9d4ba4p-1 (RZ)
53 {0x3f08aebe, 1, 0, 1}, // x = 0x1.4555p51, cos(x) = 0x1.115d7cp-1 (RZ)
54 {0x3f7f14bb, 1, 0, 0}, // x = 0x1.3170fp63, cos(x) = 0x1.fe2976p-1 (RZ)
55 {0x3f78142e, 1, 0, 1}, // x = 0x1.2b9622p67, cos(x) = 0x1.f0285cp-1 (RZ)
56 {0x3f08a21c, 1, 0, 0}, // x = 0x1.ddebdep120, cos(x) = 0x1.114438p-1 (RZ)
57};
58#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
59} // namespace sincosf_internal
60
61LIBC_INLINE void sincosf(float x, float *sinp, float *cosp) {
62 using namespace sincosf_internal;
63 using namespace sincosf_utils_internal;
64 using FPBits = typename fputil::FPBits<float>;
65 FPBits xbits(x);
66
67 uint32_t x_abs = xbits.uintval() & 0x7fff'ffffU;
68 double xd = static_cast<double>(x);
69
70 // Range reduction:
71 // For |x| >= 2^-12, we perform range reduction as follows:
72 // Find k and y such that:
73 // x = (k + y) * pi/32
74 // k is an integer
75 // |y| < 0.5
76 // For small range (|x| < 2^45 when FMA instructions are available, 2^22
77 // otherwise), this is done by performing:
78 // k = round(x * 32/pi)
79 // y = x * 32/pi - k
80 // For large range, we will omit all the higher parts of 32/pi such that the
81 // least significant bits of their full products with x are larger than 63,
82 // since:
83 // sin((k + y + 64*i) * pi/32) = sin(x + i * 2pi) = sin(x), and
84 // cos((k + y + 64*i) * pi/32) = cos(x + i * 2pi) = cos(x).
85 //
86 // When FMA instructions are not available, we store the digits of 32/pi in
87 // chunks of 28-bit precision. This will make sure that the products:
88 // x * THIRTYTWO_OVER_PI_28[i] are all exact.
89 // When FMA instructions are available, we simply store the digits of326/pi in
90 // chunks of doubles (53-bit of precision).
91 // So when multiplying by the largest values of single precision, the
92 // resulting output should be correct up to 2^(-208 + 128) ~ 2^-80. By the
93 // worst-case analysis of range reduction, |y| >= 2^-38, so this should give
94 // us more than 40 bits of accuracy. For the worst-case estimation of range
95 // reduction, see for instances:
96 // Elementary Functions by J-M. Muller, Chapter 11,
97 // Handbook of Floating-Point Arithmetic by J-M. Muller et. al.,
98 // Chapter 10.2.
99 //
100 // Once k and y are computed, we then deduce the answer by the sine and cosine
101 // of sum formulas:
102 // sin(x) = sin((k + y)*pi/32)
103 // = sin(y*pi/32) * cos(k*pi/32) + cos(y*pi/32) * sin(k*pi/32)
104 // cos(x) = cos((k + y)*pi/32)
105 // = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32)
106 // The values of sin(k*pi/32) and cos(k*pi/32) for k = 0..63 are precomputed
107 // and stored using a vector of 32 doubles. Sin(y*pi/32) and cos(y*pi/32) are
108 // computed using degree-7 and degree-6 minimax polynomials generated by
109 // Sollya respectively.
110
111 // |x| < 0x1.0p-12f
112 if (LIBC_UNLIKELY(x_abs < 0x3980'0000U)) {
113 if (LIBC_UNLIKELY(x_abs == 0U)) {
114 // For signed zeros.
115 *sinp = x;
116 *cosp = 1.0f;
117 return;
118 }
119 // When |x| < 2^-12, the relative errors of the approximations
120 // sin(x) ~ x, cos(x) ~ 1
121 // are:
122 // |sin(x) - x| / |sin(x)| < |x^3| / (6|x|)
123 // = x^2 / 6
124 // < 2^-25
125 // < epsilon(1)/2.
126 // |cos(x) - 1| < |x^2 / 2| = 2^-25 < epsilon(1)/2.
127 // So the correctly rounded values of sin(x) and cos(x) are:
128 // sin(x) = x - sign(x)*eps(x) if rounding mode = FE_TOWARDZERO,
129 // or (rounding mode = FE_UPWARD and x is
130 // negative),
131 // = x otherwise.
132 // cos(x) = 1 - eps(x) if rounding mode = FE_TOWARDZERO or FE_DOWWARD,
133 // = 1 otherwise.
134 // To simplify the rounding decision and make it more efficient and to
135 // prevent compiler to perform constant folding, we use
136 // sin(x) = fma(x, -2^-25, x),
137 // cos(x) = fma(x*0.5f, -x, 1)
138 // instead.
139 // Note: to use the formula x - 2^-25*x to decide the correct rounding, we
140 // do need fma(x, -2^-25, x) to prevent underflow caused by -2^-25*x when
141 // |x| < 2^-125. For targets without FMA instructions, we simply use
142 // double for intermediate results as it is more efficient than using an
143 // emulated version of FMA.
144#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
145 *sinp = fputil::multiply_add(x, -0x1.0p-25f, x);
146 *cosp = fputil::multiply_add(FPBits(x_abs).get_val(), -0x1.0p-25f, 1.0f);
147#else
148 *sinp = static_cast<float>(fputil::multiply_add(x: xd, y: -0x1.0p-25, z: xd));
149 *cosp = static_cast<float>(fputil::multiply_add(
150 x: static_cast<double>(FPBits(x_abs).get_val()), y: -0x1.0p-25, z: 1.0));
151#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
152 return;
153 }
154
155 // x is inf or nan.
156 if (LIBC_UNLIKELY(x_abs >= 0x7f80'0000U)) {
157 if (xbits.is_signaling_nan()) {
158 fputil::raise_except_if_required(FE_INVALID);
159 *sinp = *cosp = FPBits::quiet_nan().get_val();
160 return;
161 }
162
163 if (x_abs == 0x7f80'0000U) {
164 fputil::set_errno_if_required(EDOM);
165 fputil::raise_except_if_required(FE_INVALID);
166 }
167 *sinp = FPBits::quiet_nan().get_val();
168 *cosp = *sinp;
169 return;
170 }
171
172#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
173 // Check exceptional values.
174 for (int i = 0; i < N_EXCEPTS; ++i) {
175 if (LIBC_UNLIKELY(x_abs == EXCEPT_INPUTS[i])) {
176 uint32_t s = EXCEPT_OUTPUTS_SIN[i][0]; // FE_TOWARDZERO
177 uint32_t c = EXCEPT_OUTPUTS_COS[i][0]; // FE_TOWARDZERO
178 bool x_sign = x < 0;
179#ifdef LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
180 s += EXCEPT_OUTPUTS_SIN[i][3];
181 c += EXCEPT_OUTPUTS_COS[i][3];
182#else
183 switch (fputil::quick_get_round()) {
184 case FE_UPWARD:
185 s += x_sign ? EXCEPT_OUTPUTS_SIN[i][2] : EXCEPT_OUTPUTS_SIN[i][1];
186 c += EXCEPT_OUTPUTS_COS[i][1];
187 break;
188 case FE_DOWNWARD:
189 s += x_sign ? EXCEPT_OUTPUTS_SIN[i][1] : EXCEPT_OUTPUTS_SIN[i][2];
190 c += EXCEPT_OUTPUTS_COS[i][2];
191 break;
192 case FE_TONEAREST:
193 s += EXCEPT_OUTPUTS_SIN[i][3];
194 c += EXCEPT_OUTPUTS_COS[i][3];
195 break;
196 }
197#endif // LIBC_MATH_HAS_ASSUME_ROUND_NEAREST_ONLY
198 *sinp = x_sign ? -FPBits(s).get_val() : FPBits(s).get_val();
199 *cosp = FPBits(c).get_val();
200
201 return;
202 }
203 }
204#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
205
206 // Combine the results with the sine and cosine of sum formulas:
207 // sin(x) = sin((k + y)*pi/32)
208 // = sin(y*pi/32) * cos(k*pi/32) + cos(y*pi/32) * sin(k*pi/32)
209 // = sin_y * cos_k + (1 + cosm1_y) * sin_k
210 // = sin_y * cos_k + (cosm1_y * sin_k + sin_k)
211 // cos(x) = cos((k + y)*pi/32)
212 // = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32)
213 // = cosm1_y * cos_k + sin_y * sin_k
214 // = (cosm1_y * cos_k + cos_k) + sin_y * sin_k
215 double sin_k = 0;
216 double cos_k = 0;
217 double sin_y = 0;
218 double cosm1_y = 0;
219
220 sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y);
221
222 *sinp = static_cast<float>(fputil::multiply_add(
223 x: sin_y, y: cos_k, z: fputil::multiply_add(x: cosm1_y, y: sin_k, z: sin_k)));
224 *cosp = static_cast<float>(fputil::multiply_add(
225 x: sin_y, y: -sin_k, z: fputil::multiply_add(x: cosm1_y, y: cos_k, z: cos_k)));
226}
227
228} // namespace math
229
230} // namespace LIBC_NAMESPACE_DECL
231
232#endif // LLVM_LIBC_SRC___SUPPORT_MATH_SINCOSF_H
233