1//===-- Implementation header for hypot -------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_HYPOT_H
10#define LLVM_LIBC_SRC___SUPPORT_MATH_HYPOT_H
11
12#include "src/__support/FPUtil/FEnvImpl.h"
13#include "src/__support/FPUtil/FPBits.h"
14#include "src/__support/FPUtil/Hypot.h"
15#include "src/__support/FPUtil/double_double.h"
16#include "src/__support/FPUtil/multiply_add.h"
17#include "src/__support/FPUtil/sqrt.h"
18#include "src/__support/common.h"
19#include "src/__support/macros/config.h"
20#include "src/__support/macros/optimization.h"
21
22namespace LIBC_NAMESPACE_DECL {
23namespace math {
24
25LIBC_INLINE double hypot(double x, double y) {
26 using FPBits = fputil::FPBits<double>;
27 using DoubleDouble = fputil::DoubleDouble;
28
29 uint64_t x_u = FPBits(x).uintval();
30 uint64_t y_u = FPBits(y).uintval();
31
32 // Shift the exponent field to the top 11 bits of the lower 32-bit.
33 // Casting it to 32-bit effectively remove the sign bit.
34 uint32_t x_e = static_cast<uint32_t>(x_u >> 31);
35 uint32_t y_e = static_cast<uint32_t>(y_u >> 31);
36
37 // a = maximum_mag(x, y);
38 // b = minimum_mag(x, y);
39 double a, b;
40 uint32_t a_e, b_e;
41
42 if (x_e >= y_e) {
43 a_e = x_e;
44 b_e = y_e;
45 a = x;
46 b = y;
47 } else {
48 a_e = y_e;
49 b_e = x_e;
50 a = y;
51 b = x;
52 }
53
54 double scale = 1.0;
55 double scale_back = 1.0;
56
57 // For a_e, b_e, the top 11 bits are exponent fields.
58 if (LIBC_UNLIKELY(a_e >= ((500U + FPBits::EXP_BIAS) << (32 - 11)))) {
59 // The larger magnitude is above 2^500 (or Inf/NaN), need to scale down to
60 // prevent overflow when squaring.
61 if (a_e >= static_cast<uint32_t>(FPBits::EXP_MASK >> 31)) {
62 // Inf or NaN;
63 FPBits x_bits(x);
64 FPBits y_bits(y);
65 if (x_bits.is_signaling_nan() || y_bits.is_signaling_nan()) {
66 fputil::raise_except_if_required(FE_INVALID);
67 return FPBits::quiet_nan().get_val();
68 }
69 if (x_bits.is_inf() || y_bits.is_inf())
70 return FPBits::inf().get_val();
71 if (x_bits.is_nan())
72 return x;
73 return y;
74 }
75 // Any scaling factor < 2^(-1024/2) = 2^-512 would work.
76 scale = 0x1.0p-600;
77 scale_back = 0x1.0p600;
78 a *= scale;
79 b *= scale;
80 } else if (LIBC_UNLIKELY(b_e <= ((FPBits::EXP_BIAS - 500) << (32 - 11)))) {
81 // The smaller magnitude is below 2^-500 (or 0), need to scale up to prevent
82 // underflow when squaring.
83 if ((x == 0.0) || (y == 0.0)) {
84 double x_abs = FPBits(x_u & FPBits::EXP_SIG_MASK).get_val();
85 double y_abs = FPBits(y_u & FPBits::EXP_SIG_MASK).get_val();
86 return x_abs + y_abs;
87 }
88 // Any scaling factor > 2^((1072 + 52)/2) = 2^562 would work.
89 scale = 0x1.0p600;
90 scale_back = 0x1.0p-600;
91 a *= scale;
92 b *= scale;
93 }
94
95 // When the gap in the exponent of `a` and `b` is >= 54,
96 // |b| < ufp(a) * 2^(-53) = ulp(a)/2
97 // So:
98 // hypot(x, y) = sqrt(a^2 + b^2)
99 // <= sqrt( (|a| + |b|)^2 )
100 // = |a| + |b|
101 // < |a| + ulp(a)
102 // Hence, we can return:
103 // |a| + |b| = |x| + |y|
104 // to perform correct rounding to all rounding modes.
105 if (LIBC_UNLIKELY(a_e - b_e >= (54U << (32 - 11)))) {
106 double x_abs = FPBits(x_u & FPBits::EXP_SIG_MASK).get_val();
107 double y_abs = FPBits(y_u & FPBits::EXP_SIG_MASK).get_val();
108 return x_abs + y_abs;
109 }
110
111 // sum.hi + sum.lo ~ a^2 + b^2.
112 DoubleDouble a_sq = fputil::exact_mult(a, b: a);
113 DoubleDouble b_sq = fputil::exact_mult(a: b, b);
114 DoubleDouble sum = fputil::exact_add(a: a_sq.hi, b: b_sq.hi);
115 sum.lo += a_sq.lo + b_sq.lo;
116
117 // Let hi = sum.hi and lo = sum.lo.
118 // To compute r_hi + r_lo ~ sqrt(hi + lo):
119 // - First we use fast sqrt instruction to get:
120 // r_hi ~ sqrt(hi)
121 // - Then use Taylor expansion:
122 // f(hi + lo) = f(hi) + f'(hi) * lo + f''(hi) * lo^2 / 2 + ...
123 // with f(x) = sqrt(x):
124 // sqrt(hi + lo) ~ sqrt(hi) + lo / (2 * sqrt(hi)).
125 // - Subtract by r_hi to find the correction term:
126 // sqrt(hi + lo) - r_hi ~ (sqrt(hi) - r_hi) + lo / (2 * sqrt(hi))
127 // - Instead of finding the rounding errors sqrt(hi) - r_hi, we use the
128 // squared residual d = hi - r_hi^2, which can be calculated accurately in
129 // double-double. Then, using the same Taylor approximation of sqrt(x) as
130 // above:
131 // sqrt(hi) - r_hi = sqrt(r_hi^2 + d) - r_hi
132 // ~ sqrt(r_hi^2) + d / (2 * sqrt(r_hi^2)) - r_hi
133 // = d / (2 * r_hi).
134 // - Similarly,
135 // 1 / sqrt(hi) = 1 / sqrt(r_hi^2 + d)
136 // ~ 1 / sqrt(r_hi^2) - d / (2 * (r_hi^2)^(3/2))
137 // = 1 / r_hi - d / (2 * r_hi^3)
138 // - Putting them together, we have the correction term:
139 // sqrt(hi + lo) - r_hi + lo / (2 * sqrt(hi)) ~
140 // ~ (lo + d) / (2 * r_hi) + lo * d / (4 * r_hi^3)
141 // ~ (hi + lo - r_hi^2) / (2 * r_hi).
142 // - When computing hi + lo - r_hi^2, we will pair (hi - r_sq.hi) and
143 // (lo - r_sq.lo), since `r_sq.hi` is very close to `hi`, and the
144 // subtraction is exact.
145 // - Taking intermediate roundings with directed rounding modes into
146 // consideration, the overall errors should be bounded by
147 // (2^-51)^2 = 2^-102.
148
149 // |sqrt(sum.hi) - r_hi| < 2^-52.
150 double r_hi = fputil::sqrt<double>(x: sum.hi);
151 // r_inv ~ 1 / (2 * r_hi)
152 double r_inv = 0.5 / r_hi;
153 // r_hi^2
154 DoubleDouble r_sq = fputil::exact_mult(a: r_hi, b: r_hi);
155 // (hi + lo - r_hi^2)
156 double num_lo = (sum.lo - r_sq.lo) - (r_sq.hi - sum.hi);
157 // (hi + lo - r_hi^2) / (2 * r_hi)
158 double r_lo = num_lo * r_inv;
159
160#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
161 // TODO: What's the worst error if we just do:
162 // return sqrt(a*a + b*b) * scale_back;
163 // without all the double-double computations?
164 return (r_hi + r_lo) * scale_back;
165#else
166 constexpr double ERR = 0x1.0p-102;
167
168 // Ziv's rounding test.
169 double upper = r_hi + fputil::multiply_add(x: r_hi, y: ERR, z: r_lo);
170 double lower = r_hi + fputil::multiply_add(x: r_hi, y: -ERR, z: r_lo);
171
172 if (LIBC_LIKELY(upper == lower)) {
173 return upper * scale_back;
174 }
175
176 return fputil::hypot(x, y);
177#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
178}
179
180} // namespace math
181} // namespace LIBC_NAMESPACE_DECL
182
183#endif // LLVM_LIBC_SRC___SUPPORT_MATH_HYPOT_H
184