1//===-- Implementation header for hypotf ------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_HYPOTF_H
10#define LLVM_LIBC_SRC___SUPPORT_MATH_HYPOTF_H
11
12#include "src/__support/FPUtil/FEnvImpl.h"
13#include "src/__support/FPUtil/FPBits.h"
14#include "src/__support/FPUtil/double_double.h"
15#include "src/__support/FPUtil/multiply_add.h"
16#include "src/__support/FPUtil/sqrt.h"
17#include "src/__support/common.h"
18#include "src/__support/macros/config.h"
19#include "src/__support/macros/optimization.h"
20
21namespace LIBC_NAMESPACE_DECL {
22
23namespace math {
24
25LIBC_INLINE float hypotf(float x, float y) {
26 using DoubleBits = fputil::FPBits<double>;
27 using FPBits = fputil::FPBits<float>;
28 using fputil::DoubleDouble;
29
30 uint32_t x_a = FPBits(x).uintval() & 0x7fff'ffff;
31 uint32_t y_a = FPBits(y).uintval() & 0x7fff'ffff;
32
33 // Note: replacing `x_a >= FPBits::EXP_MASK` with `x_bits.is_inf_or_nan()`
34 // generates extra exponent bit masking instructions on x86-64.
35 if (LIBC_UNLIKELY(x_a >= FPBits::EXP_MASK || y_a >= FPBits::EXP_MASK)) {
36 // x or y is inf or nan
37 FPBits x_bits(x);
38 FPBits y_bits(y);
39 if (x_bits.is_signaling_nan() || y_bits.is_signaling_nan()) {
40 fputil::raise_except_if_required(FE_INVALID);
41 return FPBits::quiet_nan().get_val();
42 }
43 if (x_bits.is_inf() || y_bits.is_inf())
44 return FPBits::inf().get_val();
45 return x + y;
46 }
47
48 double xd = static_cast<double>(x);
49 double yd = static_cast<double>(y);
50
51 // x^2 and y^2 are exact in double precision.
52 double x_sq = xd * xd;
53
54 double sum_sq;
55#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
56 sum_sq = fputil::multiply_add(yd, yd, x_sq);
57#else
58 double y_sq = yd * yd;
59 sum_sq = x_sq + y_sq;
60#endif
61
62 // Take sqrt in double precision.
63 DoubleBits result(fputil::sqrt<double>(x: sum_sq));
64 double r = result.get_val();
65 float r_f = static_cast<float>(r);
66
67 // If any of the sticky bits of the result are non-zero, except the LSB, then
68 // the rounded result is correct.
69 uint64_t r_u = result.uintval();
70 uint32_t r_u32 = static_cast<uint32_t>(r_u);
71
72 if (LIBC_UNLIKELY(((r_u32 + 1) & 0x0FFF'FFFE) == 0)) {
73 // Almost all the sticky bits of the results are non-zero, extra checks are
74 // needed to make sure rounding is correct.
75
76 // Perform a quick check to see if the result rounded to float is already
77 // correct. Majority of hard-to-round cases fall in this case. If not, we
78 // will need to perform more expensive computations to get the correct error
79 // terms.
80 double r_d = static_cast<double>(r_f);
81 bool y_a_smaller = y_a < x_a;
82
83#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
84 // Compute the missing y_sq variable for FMA code path.
85 double y_sq = yd * yd;
86#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
87
88 double a = y_a_smaller ? x_sq : y_sq;
89 double b = y_a_smaller ? y_sq : x_sq;
90 double e = b - fputil::multiply_add(x: r_d, y: r_d, z: -a);
91 if (e == 0.0)
92 return r_f;
93
94 // Rounding correction is needed.
95 // The errors come from two parts:
96 // - rounding errors from sqrt(sum_sq) -> D(sum_sq)
97 // - rounding errors from x_sq + y_sq -> sum_sq
98 // We use FastTwoSum algorithm to compute those errors and then combine.
99#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
100 double sum_sq_lo = (b - (sum_sq - a));
101 double err = sum_sq_lo - fputil::multiply_add(r, r, -sum_sq);
102#else
103 fputil::DoubleDouble r_sq = fputil::exact_mult(a: r, b: r);
104 double sum_sq_lo = (b - (sum_sq - a));
105 double err = (sum_sq - r_sq.hi) + (sum_sq_lo - r_sq.lo);
106#endif
107
108 if (err > 0) {
109 r_u |= 1;
110 } else if ((err < 0) && ((r_u32 & 0x0FFF'FFFF) == 0)) {
111 r_u -= 1;
112 }
113
114 return static_cast<float>(DoubleBits(r_u).get_val());
115 }
116
117 return r_f;
118}
119
120} // namespace math
121
122} // namespace LIBC_NAMESPACE_DECL
123
124#endif // LLVM_LIBC_SRC___SUPPORT_MATH_HYPOTF_H
125