1//===-- High Precision Decimal ----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See httpss//llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9// -----------------------------------------------------------------------------
10// **** WARNING ****
11// This file is shared with libc++. You should also be careful when adding
12// dependencies to this file, since it needs to build for all libc++ targets.
13// -----------------------------------------------------------------------------
14
15#ifndef LLVM_LIBC_SRC___SUPPORT_HIGH_PRECISION_DECIMAL_H
16#define LLVM_LIBC_SRC___SUPPORT_HIGH_PRECISION_DECIMAL_H
17
18#include "hdr/stdint_proxy.h"
19#include "src/__support/CPP/limits.h"
20#include "src/__support/ctype_utils.h"
21#include "src/__support/macros/config.h"
22#include "src/__support/str_to_integer.h"
23#include "src/__support/wctype_utils.h"
24
25namespace LIBC_NAMESPACE_DECL {
26namespace internal {
27
28struct LShiftTableEntry {
29 uint32_t new_digits;
30 char const *power_of_five;
31};
32
33// -----------------------------------------------------------------------------
34// **** WARNING ****
35// This interface is shared with libc++, if you change this interface you need
36// to update it in both libc and libc++.
37// -----------------------------------------------------------------------------
38// This is used in both this file and in the main str_to_float.h.
39// TODO: Figure out where to put this.
40enum class RoundDirection { Up, Down, Nearest };
41
42// These constants are used in both this file and in the main str_to_float.h.
43// TODO: Figure out where to put this.
44template <typename CharType> struct constants;
45template <> struct constants<char> {
46 static constexpr char DECIMAL_POINT = '.';
47 static constexpr char DECIMAL_EXPONENT_MARKER = 'e';
48 static constexpr char HEX_EXPONENT_MARKER = 'p';
49 static constexpr char INF_STRING[] = "infinity";
50 static constexpr char NAN_STRING[] = "nan";
51};
52template <> struct constants<wchar_t> {
53 static constexpr wchar_t DECIMAL_POINT = L'.';
54 static constexpr wchar_t DECIMAL_EXPONENT_MARKER = L'e';
55 static constexpr wchar_t HEX_EXPONENT_MARKER = L'p';
56 static constexpr wchar_t INF_STRING[] = L"infinity";
57 static constexpr wchar_t NAN_STRING[] = L"nan";
58};
59
60// This is based on the HPD data structure described as part of the Simple
61// Decimal Conversion algorithm by Nigel Tao, described at this link:
62// https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html
63class HighPrecisionDecimal {
64
65 // This precomputed table speeds up left shifts by having the number of new
66 // digits that will be added by multiplying 5^i by 2^i. If the number is less
67 // than 5^i then it will add one fewer digit. There are only 60 entries since
68 // that's the max shift amount.
69 // This table was generated by the script at
70 // libc/utils/mathtools/GenerateHPDConstants.py
71 static constexpr LShiftTableEntry LEFT_SHIFT_DIGIT_TABLE[] = {
72 {.new_digits: 0, .power_of_five: ""},
73 {.new_digits: 1, .power_of_five: "5"},
74 {.new_digits: 1, .power_of_five: "25"},
75 {.new_digits: 1, .power_of_five: "125"},
76 {.new_digits: 2, .power_of_five: "625"},
77 {.new_digits: 2, .power_of_five: "3125"},
78 {.new_digits: 2, .power_of_five: "15625"},
79 {.new_digits: 3, .power_of_five: "78125"},
80 {.new_digits: 3, .power_of_five: "390625"},
81 {.new_digits: 3, .power_of_five: "1953125"},
82 {.new_digits: 4, .power_of_five: "9765625"},
83 {.new_digits: 4, .power_of_five: "48828125"},
84 {.new_digits: 4, .power_of_five: "244140625"},
85 {.new_digits: 4, .power_of_five: "1220703125"},
86 {.new_digits: 5, .power_of_five: "6103515625"},
87 {.new_digits: 5, .power_of_five: "30517578125"},
88 {.new_digits: 5, .power_of_five: "152587890625"},
89 {.new_digits: 6, .power_of_five: "762939453125"},
90 {.new_digits: 6, .power_of_five: "3814697265625"},
91 {.new_digits: 6, .power_of_five: "19073486328125"},
92 {.new_digits: 7, .power_of_five: "95367431640625"},
93 {.new_digits: 7, .power_of_five: "476837158203125"},
94 {.new_digits: 7, .power_of_five: "2384185791015625"},
95 {.new_digits: 7, .power_of_five: "11920928955078125"},
96 {.new_digits: 8, .power_of_five: "59604644775390625"},
97 {.new_digits: 8, .power_of_five: "298023223876953125"},
98 {.new_digits: 8, .power_of_five: "1490116119384765625"},
99 {.new_digits: 9, .power_of_five: "7450580596923828125"},
100 {.new_digits: 9, .power_of_five: "37252902984619140625"},
101 {.new_digits: 9, .power_of_five: "186264514923095703125"},
102 {.new_digits: 10, .power_of_five: "931322574615478515625"},
103 {.new_digits: 10, .power_of_five: "4656612873077392578125"},
104 {.new_digits: 10, .power_of_five: "23283064365386962890625"},
105 {.new_digits: 10, .power_of_five: "116415321826934814453125"},
106 {.new_digits: 11, .power_of_five: "582076609134674072265625"},
107 {.new_digits: 11, .power_of_five: "2910383045673370361328125"},
108 {.new_digits: 11, .power_of_five: "14551915228366851806640625"},
109 {.new_digits: 12, .power_of_five: "72759576141834259033203125"},
110 {.new_digits: 12, .power_of_five: "363797880709171295166015625"},
111 {.new_digits: 12, .power_of_five: "1818989403545856475830078125"},
112 {.new_digits: 13, .power_of_five: "9094947017729282379150390625"},
113 {.new_digits: 13, .power_of_five: "45474735088646411895751953125"},
114 {.new_digits: 13, .power_of_five: "227373675443232059478759765625"},
115 {.new_digits: 13, .power_of_five: "1136868377216160297393798828125"},
116 {.new_digits: 14, .power_of_five: "5684341886080801486968994140625"},
117 {.new_digits: 14, .power_of_five: "28421709430404007434844970703125"},
118 {.new_digits: 14, .power_of_five: "142108547152020037174224853515625"},
119 {.new_digits: 15, .power_of_five: "710542735760100185871124267578125"},
120 {.new_digits: 15, .power_of_five: "3552713678800500929355621337890625"},
121 {.new_digits: 15, .power_of_five: "17763568394002504646778106689453125"},
122 {.new_digits: 16, .power_of_five: "88817841970012523233890533447265625"},
123 {.new_digits: 16, .power_of_five: "444089209850062616169452667236328125"},
124 {.new_digits: 16, .power_of_five: "2220446049250313080847263336181640625"},
125 {.new_digits: 16, .power_of_five: "11102230246251565404236316680908203125"},
126 {.new_digits: 17, .power_of_five: "55511151231257827021181583404541015625"},
127 {.new_digits: 17, .power_of_five: "277555756156289135105907917022705078125"},
128 {.new_digits: 17, .power_of_five: "1387778780781445675529539585113525390625"},
129 {.new_digits: 18, .power_of_five: "6938893903907228377647697925567626953125"},
130 {.new_digits: 18, .power_of_five: "34694469519536141888238489627838134765625"},
131 {.new_digits: 18, .power_of_five: "173472347597680709441192448139190673828125"},
132 {.new_digits: 19, .power_of_five: "867361737988403547205962240695953369140625"},
133 };
134
135 // The maximum amount we can shift is the number of bits used in the
136 // accumulator, minus the number of bits needed to represent the base (in this
137 // case 4).
138 static constexpr uint32_t MAX_SHIFT_AMOUNT = sizeof(uint64_t) - 4;
139
140 // 800 is an arbitrary number of digits, but should be
141 // large enough for any practical number.
142 static constexpr uint32_t MAX_NUM_DIGITS = 800;
143
144 uint32_t num_digits = 0;
145 int32_t decimal_point = 0;
146 bool truncated = false;
147 uint8_t digits[MAX_NUM_DIGITS];
148
149private:
150 LIBC_INLINE bool should_round_up(int32_t round_to_digit,
151 RoundDirection round) {
152 if (round_to_digit < 0 ||
153 static_cast<uint32_t>(round_to_digit) >= this->num_digits) {
154 return false;
155 }
156
157 // The above condition handles all cases where all of the trailing digits
158 // are zero. In that case, if the rounding mode is up, then this number
159 // should be rounded up. Similarly, if the rounding mode is down, then it
160 // should always round down.
161 if (round == RoundDirection::Up) {
162 return true;
163 } else if (round == RoundDirection::Down) {
164 return false;
165 }
166 // Else round to nearest.
167
168 // If we're right in the middle and there are no extra digits
169 if (this->digits[round_to_digit] == 5 &&
170 static_cast<uint32_t>(round_to_digit + 1) == this->num_digits) {
171
172 // Round up if we've truncated (since that means the result is slightly
173 // higher than what's represented.)
174 if (this->truncated) {
175 return true;
176 }
177
178 // If this exactly halfway, round to even.
179 if (round_to_digit == 0)
180 // When the input is ".5".
181 return false;
182 return this->digits[round_to_digit - 1] % 2 != 0;
183 }
184 // If there are digits after round_to_digit, they must be non-zero since we
185 // trim trailing zeroes after all operations that change digits.
186 return this->digits[round_to_digit] >= 5;
187 }
188
189 // Takes an amount to left shift and returns the number of new digits needed
190 // to store the result based on LEFT_SHIFT_DIGIT_TABLE.
191 LIBC_INLINE uint32_t get_num_new_digits(uint32_t lshift_amount) {
192 const char *power_of_five =
193 LEFT_SHIFT_DIGIT_TABLE[lshift_amount].power_of_five;
194 uint32_t new_digits = LEFT_SHIFT_DIGIT_TABLE[lshift_amount].new_digits;
195 uint32_t digit_index = 0;
196 while (power_of_five[digit_index] != 0) {
197 if (digit_index >= this->num_digits) {
198 return new_digits - 1;
199 }
200 if (this->digits[digit_index] !=
201 internal::b36_char_to_int(ch: power_of_five[digit_index])) {
202 return new_digits -
203 ((this->digits[digit_index] <
204 internal::b36_char_to_int(ch: power_of_five[digit_index]))
205 ? 1
206 : 0);
207 }
208 ++digit_index;
209 }
210 return new_digits;
211 }
212
213 // Trim all trailing 0s
214 LIBC_INLINE void trim_trailing_zeroes() {
215 while (this->num_digits > 0 && this->digits[this->num_digits - 1] == 0) {
216 --this->num_digits;
217 }
218 if (this->num_digits == 0) {
219 this->decimal_point = 0;
220 }
221 }
222
223 // Perform a digitwise binary non-rounding right shift on this value by
224 // shift_amount. The shift_amount can't be more than MAX_SHIFT_AMOUNT to
225 // prevent overflow.
226 LIBC_INLINE void right_shift(uint32_t shift_amount) {
227 uint32_t read_index = 0;
228 uint32_t write_index = 0;
229
230 uint64_t accumulator = 0;
231
232 const uint64_t shift_mask = (uint64_t(1) << shift_amount) - 1;
233
234 // Warm Up phase: we don't have enough digits to start writing, so just
235 // read them into the accumulator.
236 while (accumulator >> shift_amount == 0) {
237 uint64_t read_digit = 0;
238 // If there are still digits to read, read the next one, else the digit is
239 // assumed to be 0.
240 if (read_index < this->num_digits) {
241 read_digit = this->digits[read_index];
242 }
243 accumulator = accumulator * 10 + read_digit;
244 ++read_index;
245 }
246
247 // Shift the decimal point by the number of digits it took to fill the
248 // accumulator.
249 this->decimal_point -= read_index - 1;
250
251 // Middle phase: we have enough digits to write, as well as more digits to
252 // read. Keep reading until we run out of digits.
253 while (read_index < this->num_digits) {
254 uint64_t read_digit = this->digits[read_index];
255 uint64_t write_digit = accumulator >> shift_amount;
256 accumulator &= shift_mask;
257 this->digits[write_index] = static_cast<uint8_t>(write_digit);
258 accumulator = accumulator * 10 + read_digit;
259 ++read_index;
260 ++write_index;
261 }
262
263 // Cool Down phase: All of the readable digits have been read, so just write
264 // the remainder, while treating any more digits as 0.
265 while (accumulator > 0) {
266 uint64_t write_digit = accumulator >> shift_amount;
267 accumulator &= shift_mask;
268 if (write_index < MAX_NUM_DIGITS) {
269 this->digits[write_index] = static_cast<uint8_t>(write_digit);
270 ++write_index;
271 } else if (write_digit > 0) {
272 this->truncated = true;
273 }
274 accumulator = accumulator * 10;
275 }
276 this->num_digits = write_index;
277 this->trim_trailing_zeroes();
278 }
279
280 // Perform a digitwise binary non-rounding left shift on this value by
281 // shift_amount. The shift_amount can't be more than MAX_SHIFT_AMOUNT to
282 // prevent overflow.
283 LIBC_INLINE void left_shift(uint32_t shift_amount) {
284 uint32_t new_digits = this->get_num_new_digits(lshift_amount: shift_amount);
285
286 int32_t read_index = static_cast<int32_t>(this->num_digits - 1);
287 uint32_t write_index = this->num_digits + new_digits;
288
289 uint64_t accumulator = 0;
290
291 // No Warm Up phase. Since we're putting digits in at the top and taking
292 // digits from the bottom we don't have to wait for the accumulator to fill.
293
294 // Middle phase: while we have more digits to read, keep reading as well as
295 // writing.
296 while (read_index >= 0) {
297 accumulator += static_cast<uint64_t>(this->digits[read_index])
298 << shift_amount;
299 uint64_t next_accumulator = accumulator / 10;
300 uint64_t write_digit = accumulator - (10 * next_accumulator);
301 --write_index;
302 if (write_index < MAX_NUM_DIGITS) {
303 this->digits[write_index] = static_cast<uint8_t>(write_digit);
304 } else if (write_digit != 0) {
305 this->truncated = true;
306 }
307 accumulator = next_accumulator;
308 --read_index;
309 }
310
311 // Cool Down phase: there are no more digits to read, so just write the
312 // remaining digits in the accumulator.
313 while (accumulator > 0) {
314 uint64_t next_accumulator = accumulator / 10;
315 uint64_t write_digit = accumulator - (10 * next_accumulator);
316 --write_index;
317 if (write_index < MAX_NUM_DIGITS) {
318 this->digits[write_index] = static_cast<uint8_t>(write_digit);
319 } else if (write_digit != 0) {
320 this->truncated = true;
321 }
322 accumulator = next_accumulator;
323 }
324
325 this->num_digits += new_digits;
326 if (this->num_digits > MAX_NUM_DIGITS) {
327 this->num_digits = MAX_NUM_DIGITS;
328 }
329 this->decimal_point += new_digits;
330 this->trim_trailing_zeroes();
331 }
332
333public:
334 // num_string is assumed to be a string of numeric characters. It doesn't
335 // handle leading spaces.
336 template <typename CharType>
337 LIBC_INLINE HighPrecisionDecimal(
338 const CharType *__restrict num_string,
339 const size_t num_len = cpp::numeric_limits<size_t>::max()) {
340 bool saw_dot = false;
341 size_t num_cur = 0;
342 // This counts the digits in the number, even if there isn't space to store
343 // them all.
344 uint32_t total_digits = 0;
345 while (num_cur < num_len &&
346 (isdigit(num_string[num_cur]) ||
347 num_string[num_cur] == constants<CharType>::DECIMAL_POINT)) {
348 if (num_string[num_cur] == constants<CharType>::DECIMAL_POINT) {
349 if (saw_dot) {
350 break;
351 }
352 this->decimal_point = static_cast<int32_t>(total_digits);
353 saw_dot = true;
354 } else {
355 int digit = b36_char_to_int(num_string[num_cur]);
356 if (digit == 0 && this->num_digits == 0) {
357 --this->decimal_point;
358 ++num_cur;
359 continue;
360 }
361 ++total_digits;
362 if (this->num_digits < MAX_NUM_DIGITS) {
363 this->digits[this->num_digits] = static_cast<uint8_t>(digit);
364 ++this->num_digits;
365 } else if (digit != 0) {
366 this->truncated = true;
367 }
368 }
369 ++num_cur;
370 }
371
372 if (!saw_dot)
373 this->decimal_point = static_cast<int32_t>(total_digits);
374
375 if (num_cur < num_len && tolower(num_string[num_cur]) ==
376 constants<CharType>::DECIMAL_EXPONENT_MARKER) {
377 ++num_cur;
378 if (isdigit(num_string[num_cur]) || get_sign(num_string + num_cur) != 0) {
379 auto result =
380 strtointeger<int32_t>(num_string + num_cur, 10, num_len - num_cur);
381 if (result.has_error()) {
382 // TODO: handle error
383 }
384 int32_t add_to_exponent = result.value;
385
386 // Here we do this operation as int64 to avoid overflow.
387 int64_t temp_exponent = static_cast<int64_t>(this->decimal_point) +
388 static_cast<int64_t>(add_to_exponent);
389
390 // Theoretically these numbers should be MAX_BIASED_EXPONENT for long
391 // double, but that should be ~16,000 which is much less than 1 << 30.
392 if (temp_exponent > (1 << 30)) {
393 temp_exponent = (1 << 30);
394 } else if (temp_exponent < -(1 << 30)) {
395 temp_exponent = -(1 << 30);
396 }
397 this->decimal_point = static_cast<int32_t>(temp_exponent);
398 }
399 }
400
401 this->trim_trailing_zeroes();
402 }
403
404 // Binary shift left (shift_amount > 0) or right (shift_amount < 0)
405 LIBC_INLINE void shift(int shift_amount) {
406 if (shift_amount == 0) {
407 return;
408 }
409 // Left
410 else if (shift_amount > 0) {
411 while (static_cast<uint32_t>(shift_amount) > MAX_SHIFT_AMOUNT) {
412 this->left_shift(shift_amount: MAX_SHIFT_AMOUNT);
413 shift_amount -= MAX_SHIFT_AMOUNT;
414 }
415 this->left_shift(shift_amount: static_cast<uint32_t>(shift_amount));
416 }
417 // Right
418 else {
419 while (static_cast<uint32_t>(shift_amount) < -MAX_SHIFT_AMOUNT) {
420 this->right_shift(shift_amount: MAX_SHIFT_AMOUNT);
421 shift_amount += MAX_SHIFT_AMOUNT;
422 }
423 this->right_shift(shift_amount: static_cast<uint32_t>(-shift_amount));
424 }
425 }
426
427 // Round the number represented to the closest value of unsigned int type T.
428 // This is done ignoring overflow.
429 template <class T>
430 LIBC_INLINE T
431 round_to_integer_type(RoundDirection round = RoundDirection::Nearest) {
432 T result = 0;
433 uint32_t cur_digit = 0;
434
435 while (static_cast<int32_t>(cur_digit) < this->decimal_point &&
436 cur_digit < this->num_digits) {
437 result = result * 10 + (this->digits[cur_digit]);
438 ++cur_digit;
439 }
440
441 // If there are implicit 0s at the end of the number, include those.
442 while (static_cast<int32_t>(cur_digit) < this->decimal_point) {
443 result *= 10;
444 ++cur_digit;
445 }
446 return result +
447 static_cast<T>(this->should_round_up(round_to_digit: this->decimal_point, round));
448 }
449
450 // Extra functions for testing.
451
452 LIBC_INLINE uint8_t *get_digits() { return this->digits; }
453 LIBC_INLINE uint32_t get_num_digits() { return this->num_digits; }
454 LIBC_INLINE int32_t get_decimal_point() { return this->decimal_point; }
455 LIBC_INLINE void set_truncated(bool trunc) { this->truncated = trunc; }
456};
457
458} // namespace internal
459} // namespace LIBC_NAMESPACE_DECL
460
461#endif // LLVM_LIBC_SRC___SUPPORT_HIGH_PRECISION_DECIMAL_H
462