hypot.h source code [llvm_projects/libc/src/__support/math/hypot.h]

1	//===-- Implementation header for hypot -------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_HYPOT_H
10	#define LLVM_LIBC_SRC___SUPPORT_MATH_HYPOT_H
11
12	#include "src/__support/FPUtil/FEnvImpl.h"
13	#include "src/__support/FPUtil/FPBits.h"
14	#include "src/__support/FPUtil/Hypot.h"
15	#include "src/__support/FPUtil/double_double.h"
16	#include "src/__support/FPUtil/multiply_add.h"
17	#include "src/__support/FPUtil/sqrt.h"
18	#include "src/__support/common.h"
19	#include "src/__support/macros/config.h"
20	#include "src/__support/macros/optimization.h"
21
22	namespace LIBC_NAMESPACE_DECL {
23	namespace math {
24
25	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
26
27	namespace hypot_internal {
28
29	// Both input are denormals and non-zero. We scale them up and down by the
30	// inverse of the smallest normal number: 2^-1022, so that the correct "hidden"
31	// bit position is now 1. And by adding and subtracting 1 from the results, we
32	// can emulate the rounding behavior in the denormal range.
33	LIBC_INLINE double hypot_denorm(double a, double b) {
34	using fputil::DoubleDouble;
35	constexpr double SCALE = `0x1.0p1022`;
36	constexpr double SCALE_BACK = `0x1.0p-1022`;
37
38	a *= SCALE;
39	b *= SCALE;
40
41	// See the comments in the main function for the detail explanation of the
42	// computations.
43
44	// sum.hi + sum.lo ~ a^2 + b^2.
45	DoubleDouble a_sq = fputil::exact_mult(a, b: a);
46	DoubleDouble b_sq = fputil::exact_mult(a: b, b);
47	DoubleDouble sum = fputil::exact_add(a: a_sq.hi, b: b_sq.hi);
48	sum.lo += a_sq.lo + b_sq.lo;
49
50	// \|sqrt(sum.hi) - r_hi\| < 2^-52.
51	double r_hi = fputil::sqrt<double>(x: sum.hi);
52	// r_inv ~ 1 / (2 r_hi)*
53	double r_inv = `0.5` / r_hi;
54	// Adjust correction if needed.
55	DoubleDouble r_h{.lo: `0.0`, .hi: r_hi};
56	double correction = `0.0`;
57	if (r_hi < `1.0`) {
58	// When r_hi < 1, the output is denormal. We mimick rounding in denormal
59	// range with 1.0 + r_hi.
60	r_h = fputil::exact_add(a: `1.0`, b: r_hi);
61	correction = `1.0`;
62	}
63	// r_hi^2
64	DoubleDouble r_sq = fputil::exact_mult(a: r_hi, b: r_hi);
65	// (hi + lo - r_hi^2)
66	double num_lo = (sum.lo - r_sq.lo) - (r_sq.hi - sum.hi);
67	// (hi + lo - r_hi^2) / (2 r_hi)*
68	double r_lo = fputil::multiply_add(x: num_lo, y: r_inv, z: r_h.lo);
69
70	constexpr double ERR = `0x1.0p-102`;
71
72	// Ziv's rounding test.
73	double upper = r_h.hi + (r_lo + ERR);
74	double lower = r_h.hi + (r_lo - ERR);
75
76	if (LIBC_LIKELY(upper == lower)) {
77	#ifdef LIBC_MATH_HAS_NO_EXCEPT
78	return (upper - correction) * SCALE_BACK;
79	#else
80	// Check to raise underflow correctly.
81	DoubleDouble r = fputil::exact_add(r_h.hi, r_lo);
82	r.hi -= correction;
83	// Raise underflow if needed:
84	if ((r.hi < `1.0` && r.lo != `0.0`) \|\| (r.hi == `1.0` && r.lo < `0.0`))
85	fputil::raise_except_if_required(FE_UNDERFLOW \| FE_INEXACT);
86
87	return r.hi * SCALE_BACK;
88	#endif // LIBC_MATH_HAS_NO_EXCEPT
89	}
90
91	return fputil::hypot(x: a * SCALE_BACK, y: b * SCALE_BACK);
92	}
93
94	} // namespace hypot_internal
95
96	#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
97
98	LIBC_INLINE double hypot(double x, double y) {
99	using FPBits = fputil::FPBits<double>;
100	using DoubleDouble = fputil::DoubleDouble;
101
102	uint64_t x_u = FPBits (x).uintval();
103	uint64_t y_u = FPBits (y).uintval();
104
105	// Shift the exponent field to the top 11 bits of the lower 32-bit.
106	// Casting it to 32-bit effectively remove the sign bit.
107	uint32_t x_e = static_cast<uint32_t>(x_u >> `31`);
108	uint32_t y_e = static_cast<uint32_t>(y_u >> `31`);
109
110	// a = maximum_mag(x, y);
111	// b = minimum_mag(x, y);
112	double a, b;
113	uint32_t a_e, b_e;
114
115	if (x_e >= y_e) {
116	a_e = x_e;
117	b_e = y_e;
118	a = x;
119	b = y;
120	} else {
121	a_e = y_e;
122	b_e = x_e;
123	a = y;
124	b = x;
125	}
126
127	double scale = `1.0`;
128	double scale_back = `1.0`;
129
130	// For a_e, b_e, the top 11 bits are exponent fields.
131	if (LIBC_UNLIKELY(a_e >= ((`500U` + FPBits::EXP_BIAS) << (`32` - `11`)))) {
132	// The larger magnitude is above 2^500 (or Inf/NaN), need to scale down to
133	// prevent overflow when squaring.
134	if (a_e >= static_cast<uint32_t>(FPBits::EXP_MASK >> `31`)) {
135	// Inf or NaN;
136	FPBits x_bits(x);
137	FPBits y_bits(y);
138	if (x_bits.is_signaling_nan() \|\| y_bits.is_signaling_nan()) {
139	fputil::raise_except_if_required(FE_INVALID);
140	return FPBits::quiet_nan().get_val();
141	}
142	if (x_bits.is_inf() \|\| y_bits.is_inf())
143	return FPBits::inf().get_val();
144	if (x_bits.is_nan())
145	return x;
146	return y;
147	}
148
149	// Check the exponent gap here so that all the follow up pre-scaling and
150	// overflow check won't generate spurious underflow exceptions.
151	if (LIBC_UNLIKELY(a_e - b_e >= (`54U` << (`32` - `11`)))) {
152	double x_abs = FPBits (x_u & FPBits::EXP_SIG_MASK).get_val();
153	double y_abs = FPBits (y_u & FPBits::EXP_SIG_MASK).get_val();
154	return x_abs + y_abs;
155	}
156	// Any scaling factor < 2^(-1024/2) = 2^-512 would work.
157	scale = `0x1.0p-600`;
158	scale_back = `0x1.0p600`;
159	a *= scale;
160	b *= scale;
161	// Check for overflow to raise the exception correctly.
162	#if !defined(LIBC_MATH_HAS_NO_EXCEPT)
163	// No overflow when calculating a^2 + b^2.
164	double asq = a * a;
165	double bsq = b * b;
166	double sumsq = asq + bsq;
167	// Overflow happens when:
168	// 2^600 sqrt(a^2 + b^2) >= 2^1023 * (2 - 2^-53)*
169	// Which is equivalent to:
170	// sqrt(a^2 + b^2) >= 2^424 (1 - 2^-54).*
171	// Square both sides:
172	// a^2 + b^2 >= 2^848 (1 - 2^-53 + 2^-108).*
173	// For a fast sufficient condition that can be done in double precision:
174	// a^2 + b^2 >= 2^848.
175	if (sumsq >= `0x1.0p848`)
176	return sumsq * scale_back;
177	#endif // !LIBC_MATH_HAS_NO_EXCEPT
178	} else if (LIBC_UNLIKELY(b_e <= ((FPBits::EXP_BIAS - `400`) << (`32` - `11`)))) {
179	// The smaller magnitude is below 2^-400 (or 0), need to scale up to prevent
180	// underflow when squaring.
181	if (LIBC_UNLIKELY(a_e < (`1U` << (`32` - `11`)))) {
182	// Larger input is denormal, extra care is needed to perform the Ziv's
183	// accuracy test correctly as double-rounding errors might happen.
184	if ((x == `0.0`) \|\| (y == `0.0`)) {
185	double x_abs = FPBits (x_u & FPBits::EXP_SIG_MASK).get_val();
186	double y_abs = FPBits (y_u & FPBits::EXP_SIG_MASK).get_val();
187	return x_abs + y_abs;
188	}
189	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
190	return hypot_internal::hypot_denorm(a, b);
191	#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
192	}
193	// Any scaling factor > 2^((1072 + 52)/2) = 2^562 would work.
194	scale = `0x1.0p600`;
195	scale_back = `0x1.0p-600`;
196	a *= scale;
197	b *= scale;
198	}
199
200	// When the gap in the exponent of `a` and `b` is >= 54,
201	// \|b\| < ufp(a) 2^(-53) = ulp(a)/2*
202	// So:
203	// hypot(x, y) = sqrt(a^2 + b^2)
204	// <= sqrt( (\|a\| + \|b\|)^2 )
205	// = \|a\| + \|b\|
206	// < \|a\| + ulp(a)
207	// Hence, we can return:
208	// \|a\| + \|b\| = \|x\| + \|y\|
209	// to perform correct rounding to all rounding modes.
210	if (LIBC_UNLIKELY(a_e - b_e >= (`54U` << (`32` - `11`)))) {
211	double x_abs = FPBits (x_u & FPBits::EXP_SIG_MASK).get_val();
212	double y_abs = FPBits (y_u & FPBits::EXP_SIG_MASK).get_val();
213	return x_abs + y_abs;
214	}
215
216	// sum.hi + sum.lo ~ a^2 + b^2.
217	DoubleDouble a_sq = fputil::exact_mult(a, b: a);
218	DoubleDouble b_sq = fputil::exact_mult(a: b, b);
219	DoubleDouble sum = fputil::exact_add(a: a_sq.hi, b: b_sq.hi);
220	sum.lo += a_sq.lo + b_sq.lo;
221
222	// Let hi = sum.hi and lo = sum.lo.
223	// To compute r_hi + r_lo ~ sqrt(hi + lo):
224	// - First we use fast sqrt instruction to get:
225	// r_hi ~ sqrt(hi)
226	// - Then use Taylor expansion:
227	// f(hi + lo) = f(hi) + f'(hi) lo + f''(hi) * lo^2 / 2 + ...*
228	// with f(x) = sqrt(x):
229	// sqrt(hi + lo) ~ sqrt(hi) + lo / (2 sqrt(hi)).*
230	// - Subtract by r_hi to find the correction term:
231	// sqrt(hi + lo) - r_hi ~ (sqrt(hi) - r_hi) + lo / (2 sqrt(hi))*
232	// - Instead of finding the rounding errors sqrt(hi) - r_hi, we use the
233	// squared residual d = hi - r_hi^2, which can be calculated accurately in
234	// double-double. Then, using the same Taylor approximation of sqrt(x) as
235	// above:
236	// sqrt(hi) - r_hi = sqrt(r_hi^2 + d) - r_hi
237	// ~ sqrt(r_hi^2) + d / (2 sqrt(r_hi^2)) - r_hi*
238	// = d / (2 r_hi).*
239	// - Similarly,
240	// 1 / sqrt(hi) = 1 / sqrt(r_hi^2 + d)
241	// ~ 1 / sqrt(r_hi^2) - d / (2 (r_hi^2)^(3/2))*
242	// = 1 / r_hi - d / (2 r_hi^3)*
243	// - Putting them together, we have the correction term:
244	// sqrt(hi + lo) - r_hi + lo / (2 sqrt(hi)) ~*
245	// ~ (lo + d) / (2 r_hi) + lo * d / (4 * r_hi^3)*
246	// ~ (hi + lo - r_hi^2) / (2 r_hi).*
247	// - When computing hi + lo - r_hi^2, we will pair (hi - r_sq.hi) and
248	// (lo - r_sq.lo), since `r_sq.hi` is very close to `hi`, and the
249	// subtraction is exact.
250	// - Taking intermediate roundings with directed rounding modes into
251	// consideration, the overall errors should be bounded by
252	// (2^-51)^2 = 2^-102.
253
254	// \|sqrt(sum.hi) - r_hi\| < 2^-52.
255	double r_hi = fputil::sqrt<double>(x: sum.hi);
256	// r_inv ~ 1 / (2 r_hi)*
257	double r_inv = `0.5` / r_hi;
258	// r_hi^2
259	DoubleDouble r_sq = fputil::exact_mult(a: r_hi, b: r_hi);
260	// (hi + lo - r_hi^2)
261	double num_lo = (sum.lo - r_sq.lo) - (r_sq.hi - sum.hi);
262	// (hi + lo - r_hi^2) / (2 r_hi)*
263	double r_lo = num_lo * r_inv;
264
265	#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
266	// TODO: What's the worst error if we just do:
267	// return sqrt(aa + bb) scale_back;*
268	// without all the double-double computations?
269	return (r_hi + r_lo) * scale_back;
270	#else
271	constexpr double ERR = `0x1.0p-102`;
272
273	// Ziv's rounding test.
274	double upper = r_hi + fputil::multiply_add(x: r_hi, y: ERR, z: r_lo);
275	double lower = r_hi + fputil::multiply_add(x: r_hi, y: -ERR, z: r_lo);
276
277	if (LIBC_LIKELY(upper == lower)) {
278	return upper * scale_back;
279	}
280
281	return fputil::hypot(x, y);
282	#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
283	}
284
285	} // namespace math
286	} // namespace LIBC_NAMESPACE_DECL
287
288	#endif // LLVM_LIBC_SRC___SUPPORT_MATH_HYPOT_H
289

Browse the source code of llvm_projects/libc/src/__support/math/hypot.h