d2s_intrinsics.h source code [llvm_projects/libcxx/src/include/ryu/d2s_intrinsics.h]

1	//===----------------------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	// Copyright (c) Microsoft Corporation.
10	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11
12	// Copyright 2018 Ulf Adams
13	// Copyright (c) Microsoft Corporation. All rights reserved.
14
15	// Boost Software License - Version 1.0 - August 17th, 2003
16
17	// Permission is hereby granted, free of charge, to any person or organization
18	// obtaining a copy of the software and accompanying documentation covered by
19	// this license (the "Software") to use, reproduce, display, distribute,
20	// execute, and transmit the Software, and to prepare derivative works of the
21	// Software, and to permit third-parties to whom the Software is furnished to
22	// do so, all subject to the following:
23
24	// The copyright notices in the Software and this entire statement, including
25	// the above license grant, this restriction and the following disclaimer,
26	// must be included in all copies of the Software, in whole or in part, and
27	// all derivative works of the Software, unless such copies or derivative
28	// works are solely in the form of machine-executable object code generated by
29	// a source language processor.
30
31	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33	// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
34	// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
35	// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
36	// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
37	// DEALINGS IN THE SOFTWARE.
38
39	#ifndef _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H
40	#define _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H
41
42	// Avoid formatting to keep the changes with the original code minimal.
43	// clang-format off
44
45	#include <__assert>
46	#include <__config>
47
48	#include "include/ryu/ryu.h"
49
50	_LIBCPP_BEGIN_NAMESPACE_STD
51
52	#if defined(_M_X64) && defined(_MSC_VER)
53	#define _LIBCPP_INTRINSIC128 1
54	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) {
55	return _umul128(__a, __b, __productHi);
56	}
57
58	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) {
59	// For the __shiftright128 intrinsic, the shift value is always
60	// modulo 64.
61	// In the current implementation of the double-precision version
62	// of Ryu, the shift value is always < 64.
63	// (The shift value is in the range [49, 58].)
64	// Check this here in case a future change requires larger shift
65	// values. In this case this function needs to be adjusted.
66	_LIBCPP_ASSERT_INTERNAL(__dist < `64`, "");
67	return __shiftright128(__lo, __hi, static_cast<unsigned char>(__dist));
68	}
69
70	// ^^^ intrinsics available ^^^ / vvv __int128 available vvv
71	#elif defined(__SIZEOF_INT128__) && ( \
72	(defined(__clang__) && !defined(_MSC_VER)) \|\| \
73	(defined(__GNUC__) && !defined(__clang__) && !defined(__CUDACC__)))
74	#define _LIBCPP_INTRINSIC128 1
75	// We have __uint128 support in clang or gcc
76	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) {
77	auto __temp = __a * (unsigned __int128)__b;
78	*__productHi = __temp >> `64`;
79	return static_cast<uint64_t>(__temp);
80	}
81
82	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) {
83	// In the current implementation of the double-precision version
84	// of Ryu, the shift value is always < 64.
85	// (The shift value is in the range [49, 58].)
86	// Check this here in case a future change requires larger shift
87	// values. In this case this function needs to be adjusted.
88	_LIBCPP_ASSERT_INTERNAL(__dist < `64`, "");
89	auto __temp = __lo \| ((unsigned __int128)__hi << `64`);
90	// For x64 128-bit shfits using the `shrd` instruction and two 64-bit
91	// registers, the shift value is modulo 64. Thus the `& 63` is free.
92	return static_cast<uint64_t>(__temp >> (__dist & `63`));
93	}
94	#else // ^^^ __int128 available ^^^ / vvv intrinsics unavailable vvv
95
96	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_ALWAYS_INLINE uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) {
97	// TRANSITION, VSO-634761
98	// The casts here help MSVC to avoid calls to the __allmul library function.
99	const uint32_t __aLo = static_cast<uint32_t>(__a);
100	const uint32_t __aHi = static_cast<uint32_t>(__a >> `32`);
101	const uint32_t __bLo = static_cast<uint32_t>(__b);
102	const uint32_t __bHi = static_cast<uint32_t>(__b >> `32`);
103
104	const uint64_t __b00 = static_cast<uint64_t>(__aLo) * __bLo;
105	const uint64_t __b01 = static_cast<uint64_t>(__aLo) * __bHi;
106	const uint64_t __b10 = static_cast<uint64_t>(__aHi) * __bLo;
107	const uint64_t __b11 = static_cast<uint64_t>(__aHi) * __bHi;
108
109	const uint32_t __b00Lo = static_cast<uint32_t>(__b00);
110	const uint32_t __b00Hi = static_cast<uint32_t>(__b00 >> `32`);
111
112	const uint64_t __mid1 = __b10 + __b00Hi;
113	const uint32_t __mid1Lo = static_cast<uint32_t>(__mid1);
114	const uint32_t __mid1Hi = static_cast<uint32_t>(__mid1 >> `32`);
115
116	const uint64_t __mid2 = __b01 + __mid1Lo;
117	const uint32_t __mid2Lo = static_cast<uint32_t>(__mid2);
118	const uint32_t __mid2Hi = static_cast<uint32_t>(__mid2 >> `32`);
119
120	const uint64_t __pHi = __b11 + __mid1Hi + __mid2Hi;
121	const uint64_t __pLo = (static_cast<uint64_t>(__mid2Lo) << `32`) \| __b00Lo;
122
123	*__productHi = __pHi;
124	return __pLo;
125	}
126
127	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) {
128	// We don't need to handle the case __dist >= 64 here (see above).
129	_LIBCPP_ASSERT_INTERNAL(__dist < `64`, "");
130	#ifdef _LIBCPP_64_BIT
131	_LIBCPP_ASSERT_INTERNAL(__dist > `0`, "");
132	return (__hi << (`64` - __dist)) \| (__lo >> __dist);
133	#else // ^^^ 64-bit ^^^ / vvv 32-bit vvv
134	// Avoid a 64-bit shift by taking advantage of the range of shift values.
135	_LIBCPP_ASSERT_INTERNAL(__dist >= `32`, "");
136	return (__hi << (`64` - __dist)) \| (static_cast<uint32_t>(__lo >> `32`) >> (__dist - `32`));
137	#endif // ^^^ 32-bit ^^^
138	}
139
140	#endif // ^^^ intrinsics unavailable ^^^
141
142	#ifndef _LIBCPP_64_BIT
143
144	// Returns the high 64 bits of the 128-bit product of __a and __b.
145	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __umulh(const uint64_t __a, const uint64_t __b) {
146	// Reuse the __ryu_umul128 implementation.
147	// Optimizers will likely eliminate the instructions used to compute the
148	// low part of the product.
149	uint64_t __hi;
150	(void) __ryu_umul128(__a, __b, productHi: &__hi);
151	return __hi;
152	}
153
154	// On 32-bit platforms, compilers typically generate calls to library
155	// functions for 64-bit divisions, even if the divisor is a constant.
156	//
157	// TRANSITION, LLVM-37932
158	//
159	// The functions here perform division-by-constant using multiplications
160	// in the same way as 64-bit compilers would do.
161	//
162	// NB:
163	// The multipliers and shift values are the ones generated by clang x64
164	// for expressions like x/5, x/10, etc.
165
166	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div5(const uint64_t __x) {
167	return __umulh(a: __x, b: `0xCCCCCCCCCCCCCCCDu`) >> `2`;
168	}
169
170	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div10(const uint64_t __x) {
171	return __umulh(a: __x, b: `0xCCCCCCCCCCCCCCCDu`) >> `3`;
172	}
173
174	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div100(const uint64_t __x) {
175	return __umulh(a: __x >> `2`, b: `0x28F5C28F5C28F5C3u`) >> `2`;
176	}
177
178	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e8(const uint64_t __x) {
179	return __umulh(a: __x, b: `0xABCC77118461CEFDu`) >> `26`;
180	}
181
182	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e9(const uint64_t __x) {
183	return __umulh(a: __x >> `9`, b: `0x44B82FA09B5A53u`) >> `11`;
184	}
185
186	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mod1e9(const uint64_t __x) {
187	// Avoid 64-bit math as much as possible.
188	// Returning static_cast<uint32_t>(__x - 1000000000 __div1e9(__x)) would*
189	// perform 32x64-bit multiplication and 64-bit subtraction.
190	// __x and 1000000000 __div1e9(__x) are guaranteed to differ by*
191	// less than 10^9, so their highest 32 bits must be identical,
192	// so we can truncate both sides to uint32_t before subtracting.
193	// We can also simplify static_cast<uint32_t>(1000000000 __div1e9(__x)).*
194	// We can truncate before multiplying instead of after, as multiplying
195	// the highest 32 bits of __div1e9(__x) can't affect the lowest 32 bits.
196	return static_cast<uint32_t>(__x) - `1000000000` * static_cast<uint32_t>(__div1e9(__x));
197	}
198
199	#else // ^^^ 32-bit ^^^ / vvv 64-bit vvv
200
201	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div5(const uint64_t __x) {
202	return __x / `5`;
203	}
204
205	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div10(const uint64_t __x) {
206	return __x / `10`;
207	}
208
209	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div100(const uint64_t __x) {
210	return __x / `100`;
211	}
212
213	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e8(const uint64_t __x) {
214	return __x / `100000000`;
215	}
216
217	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e9(const uint64_t __x) {
218	return __x / `1000000000`;
219	}
220
221	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mod1e9(const uint64_t __x) {
222	return static_cast<uint32_t>(__x - `1000000000` * __div1e9(__x));
223	}
224
225	#endif // ^^^ 64-bit ^^^
226
227	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __pow5Factor(uint64_t __value) {
228	uint32_t __count = `0`;
229	for (;;) {
230	_LIBCPP_ASSERT_INTERNAL(__value != `0`, "");
231	const uint64_t __q = __div5(x: __value);
232	const uint32_t __r = static_cast<uint32_t>(__value) - `5` * static_cast<uint32_t>(__q);
233	if (__r != `0`) {
234	break;
235	}
236	__value = __q;
237	++__count;
238	}
239	return __count;
240	}
241
242	// Returns true if __value is divisible by 5^__p.
243	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf5(const uint64_t __value, const uint32_t __p) {
244	// I tried a case distinction on __p, but there was no performance difference.
245	return __pow5Factor(__value) >= __p;
246	}
247
248	// Returns true if __value is divisible by 2^__p.
249	[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) {
250	_LIBCPP_ASSERT_INTERNAL(__value != `0`, "");
251	_LIBCPP_ASSERT_INTERNAL(__p < `64`, "");
252	// __builtin_ctzll doesn't appear to be faster here.
253	return (__value & ((`1ull` << __p) - `1`)) == `0`;
254	}
255
256	_LIBCPP_END_NAMESPACE_STD
257
258	// clang-format on
259
260	#endif // _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H
261

Browse the source code of llvm_projects/libcxx/src/include/ryu/d2s_intrinsics.h