double_double.h source code [llvm_projects/libc/src/__support/FPUtil/double_double.h]

1	//===-- Utilities for double-double data type. ------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_DOUBLE_DOUBLE_H
10	#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_DOUBLE_DOUBLE_H
11
12	#include "multiply_add.h"
13	#include "src/__support/common.h"
14	#include "src/__support/macros/config.h"
15	#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
16	#include "src/__support/number_pair.h"
17
18	namespace LIBC_NAMESPACE_DECL {
19	namespace fputil {
20
21	template <typename T> struct DefaultSplit;
22	template <> struct DefaultSplit<float> {
23	static constexpr size_t VALUE = `12`;
24	};
25	template <> struct DefaultSplit<double> {
26	static constexpr size_t VALUE = `27`;
27	};
28
29	using DoubleDouble = NumberPair<double>;
30	using FloatFloat = NumberPair<float>;
31
32	// Dekker's FastTwoSum:
33	// r_hi + r_lo ~ a + b
34	// In precision `p`, it's exact if `lsb(a) >= ulp(b)` and one of the following
35	// conditions satisfies:
36	// (i) rounding mode = RN
37	// (ii) e_a - e_b <= p
38	// (iii) rounding mode = RD and b >= 0
39	// (iv) rounding mode = RU and b <= 0
40	// (v) rounding mode = RZ and ab >= 0
41	// Otherwise, the errors err = (a + b) - (r_hi + r_lo) is bounded by:
42	// \|err\| <= 2^(-2p + 1) ufp(a + b) <= 2^(-2p + 1) * ufp(r_hi)*
43	// when `lsb(a) >= ulp(b)`.
44	// In particular,
45	// \|err\| <= 2^-47 ufp(r_hi) for single precision,*
46	// <= 2^-105 ufp(r_hi) for double precision.*
47	// If the condition `lsb(a) >= ulp(b)` does NOT satisfy, then:
48	// \|err\| < 3 2^(-p) * \|r_hi\|.*
49	// Reference:
50	// Jeannerod, C.-P. and Zimmermann, P., "FastTwoSum revisited", ARITH 2025,
51	// <https://www.arith2025.org/proceedings/215900a141.pdf>.
52	template <bool FAST2SUM = true, typename T = double>
53	LIBC_INLINE constexpr NumberPair<T> exact_add(T a, T b) {
54	NumberPair<T> r{`0.0`, `0.0`};
55	if constexpr (FAST2SUM) {
56	r.hi = a + b;
57	T t = r.hi - a;
58	r.lo = b - t;
59	} else {
60	r.hi = a + b;
61	T t1 = r.hi - a;
62	T t2 = r.hi - t1;
63	T t3 = b - t1;
64	T t4 = a - t2;
65	r.lo = t3 + t4;
66	}
67	return r;
68	}
69
70	// Following the above analysis of FastTwoSum,
71	// If `lsb(a.hi) >= ulp(b.hi)`, then the errors:
72	// err = (a.hi + a.lo + b.hi + b.lo) - (r.hi - r.lo) is bounded by:
73	// \|err\| < 2^(-2p) ufp(r.hi) + 2^(-p + 2) * ufp(a.lo + b.lo).*
74	template <typename T>
75	LIBC_INLINE constexpr NumberPair<T> add(const NumberPair<T> &a,
76	const NumberPair<T> &b) {
77	NumberPair<T> r = exact_add(a.hi, b.hi);
78	T lo = a.lo + b.lo;
79	return exact_add(r.hi, r.lo + lo);
80	}
81
82	// Assumption: \|a.hi\| >= \|b\|
83	template <typename T>
84	LIBC_INLINE constexpr NumberPair<T> add(const NumberPair<T> &a, T b) {
85	NumberPair<T> r = exact_add<false>(a.hi, b);
86	return exact_add(r.hi, r.lo + a.lo);
87	}
88
89	// Veltkamp's Splitting for double precision.
90	// Note: This is proved to be correct for all rounding modes:
91	// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
92	// Roundings," https://inria.hal.science/hal-04480440.
93	// Default splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1.
94	template <typename T = double, size_t N = DefaultSplit<T>::VALUE>
95	LIBC_INLINE constexpr NumberPair<T> split(T a) {
96	NumberPair<T> r{`0.0`, `0.0`};
97	// CN = 2^N.
98	constexpr T CN = static_cast<T>(`1` << N);
99	constexpr T C = CN + T(`1`);
100	T t1 = C * a;
101	T t2 = a - t1;
102	r.hi = t1 + t2;
103	r.lo = a - r.hi;
104	return r;
105	}
106
107	// Helper for non-fma exact mult where the first number is already split.
108	template <typename T = double, size_t SPLIT_B = DefaultSplit<T>::VALUE>
109	LIBC_INLINE constexpr NumberPair<T> exact_mult(const NumberPair<T> &as, T a,
110	T b) {
111	NumberPair<T> bs = split<T, SPLIT_B>(b);
112	NumberPair<T> r{`0.0`, `0.0`};
113
114	r.hi = a * b;
115	T t1 = as.hi * bs.hi - r.hi;
116	T t2 = as.hi * bs.lo + t1;
117	T t3 = as.lo * bs.hi + t2;
118	r.lo = as.lo * bs.lo + t3;
119
120	return r;
121	}
122
123	// The templated exact multiplication needs template version of
124	// LIBC_TARGET_CPU_HAS_FMA_ macro to correctly select the implementation.*
125	// These can be moved to "src/__support/macros/properties/cpu_features.h" if
126	// other part of libc needed.
127	template <typename T> struct TargetHasFmaInstruction {
128	static constexpr bool VALUE = false;
129	};
130
131	#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
132	template <> struct TargetHasFmaInstruction<float> {
133	static constexpr bool VALUE = true;
134	};
135	#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
136
137	#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
138	template <> struct TargetHasFmaInstruction<double> {
139	static constexpr bool VALUE = true;
140	};
141	#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
142
143	// Note: When FMA instruction is not available, the `exact_mult` function is
144	// only correct for round-to-nearest mode. See:
145	// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
146	// Roundings," https://inria.hal.science/hal-04480440.
147	// Using Theorem 1 in the paper above, without FMA instruction, if we restrict
148	// the generated constants to precision <= 51, and splitting it by 2^28 + 1,
149	// then a b = r.hi + r.lo is exact for all rounding modes.*
150	template <typename T = double, size_t SPLIT_B = DefaultSplit<T>::VALUE>
151	LIBC_INLINE LIBC_CONSTEXPR NumberPair<T> exact_mult(T a, T b) {
152	NumberPair<T> r{`0.0`, `0.0`};
153
154	if constexpr (TargetHasFmaInstruction<T>::VALUE) {
155	r.hi = a * b;
156	r.lo = fputil::multiply_add(a, b, -r.hi);
157	} else {
158	// Dekker's Product.
159	NumberPair<T> as = split(a);
160
161	r = exact_mult<T, SPLIT_B>(as, a, b);
162	}
163
164	return r;
165	}
166
167	template <typename T = double>
168	LIBC_INLINE NumberPair<T> quick_mult(T a, const NumberPair<T> &b) {
169	NumberPair<T> r = exact_mult(a, b.hi);
170	r.lo = multiply_add(a, b.lo, r.lo);
171	return r;
172	}
173
174	template <size_t SPLIT_B = `27`>
175	LIBC_INLINE constexpr DoubleDouble quick_mult(const DoubleDouble &a,
176	const DoubleDouble &b) {
177	DoubleDouble r = exact_mult<double, SPLIT_B>(a.hi, b.hi);
178	double t1 = multiply_add(x: a.hi, y: b.lo, z: r.lo);
179	double t2 = multiply_add(x: a.lo, y: b.hi, z: t1);
180	r.lo = t2;
181	return r;
182	}
183
184	// Assuming \|c\| >= \|a b\|.*
185	template <>
186	LIBC_INLINE DoubleDouble multiply_add<DoubleDouble>(const DoubleDouble &a,
187	const DoubleDouble &b,
188	const DoubleDouble &c) {
189	return add(a: c, b: quick_mult(a, b));
190	}
191
192	// Accurate double-double division, following Karp-Markstein's trick for
193	// division, implemented in the CORE-MATH project at:
194	// https://gitlab.inria.fr/core-math/core-math/-/blob/master/src/binary64/tan/tan.c#L1855
195	//
196	// Error bounds:
197	// Let a = ah + al, b = bh + bl.
198	// Let r = rh + rl be the approximation of (ah + al) / (bh + bl).
199	// Then:
200	// (ah + al) / (bh + bl) - rh =
201	// = ((ah - bh rh) + (al - bl * rh)) / (bh + bl)*
202	// = (1 + O(bl/bh)) ((ah - bh * rh) + (al - bl * rh)) / bh*
203	// Let q = round(1/bh), then the above expressions are approximately:
204	// = (1 + O(bl / bh)) (1 + O(2^-52)) * q * ((ah - bh * rh) + (al - bl * rh))*
205	// So we can compute:
206	// rl = q (ah - bh * rh) + q * (al - bl * rh)*
207	// as accurate as possible, then the error is bounded by:
208	// \|(ah + al) / (bh + bl) - (rh + rl)\| < O(bl/bh) (2^-52 + al/ah + bl/bh)*
209	template <typename T>
210	LIBC_INLINE NumberPair<T> div(const NumberPair<T> &a, const NumberPair<T> &b) {
211	NumberPair<T> r;
212	T q = T(`1`) / b.hi;
213	r.hi = a.hi * q;
214
215	#ifdef LIBC_TARGET_CPU_HAS_FMA
216	T e_hi = fputil::multiply_add(b.hi, -r.hi, a.hi);
217	T e_lo = fputil::multiply_add(b.lo, -r.hi, a.lo);
218	#else
219	NumberPair<T> b_hi_r_hi = fputil::exact_mult(b.hi, -r.hi);
220	NumberPair<T> b_lo_r_hi = fputil::exact_mult(b.lo, -r.hi);
221	T e_hi = (a.hi + b_hi_r_hi.hi) + b_hi_r_hi.lo;
222	T e_lo = (a.lo + b_lo_r_hi.hi) + b_lo_r_hi.lo;
223	#endif // LIBC_TARGET_CPU_HAS_FMA
224
225	r.lo = q * (e_hi + e_lo);
226	return r;
227	}
228
229	} // namespace fputil
230	} // namespace LIBC_NAMESPACE_DECL
231
232	#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_DOUBLE_DOUBLE_H
233

Browse the source code of llvm_projects/libc/src/__support/FPUtil/double_double.h