1//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision floating
10// point values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/FloatingPointMode.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/Hashing.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/StringExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/ADT/StringSwitch.h"
24#include "llvm/Config/llvm-config.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/Error.h"
27#include "llvm/Support/MathExtras.h"
28#include "llvm/Support/raw_ostream.h"
29#include <cstring>
30#include <limits.h>
31
32/// Shared headers from LLVM libc
33/// Make sure to add ${LLVM_SOURCE_DIR}/../libc to include directories.
34///
35/// Notes: So far it looks like APFloat does not check errnos or floating-point
36/// exceptions after calling the math functions, so we will configure LLVM libc
37/// math functions to skip setting errnos and floating-point exceptions
38/// explicitly. We also put them in a separate namespace so that the symbols
39/// do not clash with other libc math builds just in case.
40#define LIBC_NAMESPACE __llvm_libc_apfloat
41#define LIBC_MATH (LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)
42
43#include "shared/math.h"
44#include "shared/math_check_exceptions.h"
45
46#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
47 do { \
48 if (usesLayout<IEEEFloat>(getSemantics())) \
49 return U.IEEE.METHOD_CALL; \
50 if (usesLayout<DoubleAPFloat>(getSemantics())) \
51 return U.Double.METHOD_CALL; \
52 llvm_unreachable("Unexpected semantics"); \
53 } while (false)
54
55using namespace llvm;
56
57/// A macro used to combine two fcCategory enums into one key which can be used
58/// in a switch statement to classify how the interaction of two APFloat's
59/// categories affects an operation.
60///
61/// TODO: If clang source code is ever allowed to use constexpr in its own
62/// codebase, change this into a static inline function.
63#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
64
65/* Assumed in hexadecimal significand parsing, and conversion to
66 hexadecimal strings. */
67static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
68
69namespace llvm {
70
71constexpr fltSemantics APFloatBase::semIEEEhalf = {.maxExponent: 15, .minExponent: -14, .precision: 11, .sizeInBits: 16};
72constexpr fltSemantics APFloatBase::semBFloat = {.maxExponent: 127, .minExponent: -126, .precision: 8, .sizeInBits: 16};
73constexpr fltSemantics APFloatBase::semIEEEsingle = {.maxExponent: 127, .minExponent: -126, .precision: 24, .sizeInBits: 32};
74constexpr fltSemantics APFloatBase::semIEEEdouble = {.maxExponent: 1023, .minExponent: -1022, .precision: 53, .sizeInBits: 64};
75constexpr fltSemantics APFloatBase::semIEEEquad = {.maxExponent: 16383, .minExponent: -16382, .precision: 113, .sizeInBits: 128};
76constexpr fltSemantics APFloatBase::semFloat8E5M2 = {.maxExponent: 15, .minExponent: -14, .precision: 3, .sizeInBits: 8};
77constexpr fltSemantics APFloatBase::semFloat8E5M2FNUZ = {
78 .maxExponent: 15, .minExponent: -15, .precision: 3, .sizeInBits: 8, .nonFiniteBehavior: fltNonfiniteBehavior::NanOnly, .nanEncoding: fltNanEncoding::NegativeZero};
79constexpr fltSemantics APFloatBase::semFloat8E4M3 = {.maxExponent: 7, .minExponent: -6, .precision: 4, .sizeInBits: 8};
80constexpr fltSemantics APFloatBase::semFloat8E4M3FN = {
81 .maxExponent: 8, .minExponent: -6, .precision: 4, .sizeInBits: 8, .nonFiniteBehavior: fltNonfiniteBehavior::NanOnly, .nanEncoding: fltNanEncoding::AllOnes};
82constexpr fltSemantics APFloatBase::semFloat8E4M3FNUZ = {
83 .maxExponent: 7, .minExponent: -7, .precision: 4, .sizeInBits: 8, .nonFiniteBehavior: fltNonfiniteBehavior::NanOnly, .nanEncoding: fltNanEncoding::NegativeZero};
84constexpr fltSemantics APFloatBase::semFloat8E4M3B11FNUZ = {
85 .maxExponent: 4, .minExponent: -10, .precision: 4, .sizeInBits: 8, .nonFiniteBehavior: fltNonfiniteBehavior::NanOnly, .nanEncoding: fltNanEncoding::NegativeZero};
86constexpr fltSemantics APFloatBase::semFloat8E3M4 = {.maxExponent: 3, .minExponent: -2, .precision: 5, .sizeInBits: 8};
87constexpr fltSemantics APFloatBase::semFloatTF32 = {.maxExponent: 127, .minExponent: -126, .precision: 11, .sizeInBits: 19};
88constexpr fltSemantics APFloatBase::semFloat8E8M0FNU = {
89 .maxExponent: 127,
90 .minExponent: -127,
91 .precision: 1,
92 .sizeInBits: 8,
93 .nonFiniteBehavior: fltNonfiniteBehavior::NanOnly,
94 .nanEncoding: fltNanEncoding::AllOnes,
95 .hasZero: false,
96 .hasSignedRepr: false,
97 .hasSignBitInMSB: false,
98 .hasDenormals: false};
99
100constexpr fltSemantics APFloatBase::semFloat6E3M2FN = {
101 .maxExponent: 4, .minExponent: -2, .precision: 3, .sizeInBits: 6, .nonFiniteBehavior: fltNonfiniteBehavior::FiniteOnly};
102constexpr fltSemantics APFloatBase::semFloat6E2M3FN = {
103 .maxExponent: 2, .minExponent: 0, .precision: 4, .sizeInBits: 6, .nonFiniteBehavior: fltNonfiniteBehavior::FiniteOnly};
104constexpr fltSemantics APFloatBase::semFloat4E2M1FN = {
105 .maxExponent: 2, .minExponent: 0, .precision: 2, .sizeInBits: 4, .nonFiniteBehavior: fltNonfiniteBehavior::FiniteOnly};
106constexpr fltSemantics APFloatBase::semX87DoubleExtended = {.maxExponent: 16383, .minExponent: -16382, .precision: 64,
107 .sizeInBits: 80};
108constexpr fltSemantics APFloatBase::semBogus = {.maxExponent: 0, .minExponent: 0, .precision: 0, .sizeInBits: 0};
109constexpr fltSemantics APFloatBase::semPPCDoubleDouble = {.maxExponent: -1, .minExponent: 0, .precision: 0, .sizeInBits: 128};
110constexpr fltSemantics APFloatBase::semPPCDoubleDoubleLegacy = {
111 .maxExponent: 1023, .minExponent: -1022 + 53, .precision: 53 + 53, .sizeInBits: 128};
112
113const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
114 switch (S) {
115 case S_IEEEhalf:
116 return IEEEhalf();
117 case S_BFloat:
118 return BFloat();
119 case S_IEEEsingle:
120 return IEEEsingle();
121 case S_IEEEdouble:
122 return IEEEdouble();
123 case S_IEEEquad:
124 return IEEEquad();
125 case S_PPCDoubleDouble:
126 return PPCDoubleDouble();
127 case S_PPCDoubleDoubleLegacy:
128 return PPCDoubleDoubleLegacy();
129 case S_Float8E5M2:
130 return Float8E5M2();
131 case S_Float8E5M2FNUZ:
132 return Float8E5M2FNUZ();
133 case S_Float8E4M3:
134 return Float8E4M3();
135 case S_Float8E4M3FN:
136 return Float8E4M3FN();
137 case S_Float8E4M3FNUZ:
138 return Float8E4M3FNUZ();
139 case S_Float8E4M3B11FNUZ:
140 return Float8E4M3B11FNUZ();
141 case S_Float8E3M4:
142 return Float8E3M4();
143 case S_FloatTF32:
144 return FloatTF32();
145 case S_Float8E8M0FNU:
146 return Float8E8M0FNU();
147 case S_Float6E3M2FN:
148 return Float6E3M2FN();
149 case S_Float6E2M3FN:
150 return Float6E2M3FN();
151 case S_Float4E2M1FN:
152 return Float4E2M1FN();
153 case S_x87DoubleExtended:
154 return x87DoubleExtended();
155 }
156 llvm_unreachable("Unrecognised floating semantics");
157}
158
159APFloatBase::Semantics
160APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
161 if (&Sem == &llvm::APFloat::IEEEhalf())
162 return S_IEEEhalf;
163 else if (&Sem == &llvm::APFloat::BFloat())
164 return S_BFloat;
165 else if (&Sem == &llvm::APFloat::IEEEsingle())
166 return S_IEEEsingle;
167 else if (&Sem == &llvm::APFloat::IEEEdouble())
168 return S_IEEEdouble;
169 else if (&Sem == &llvm::APFloat::IEEEquad())
170 return S_IEEEquad;
171 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
172 return S_PPCDoubleDouble;
173 else if (&Sem == &llvm::APFloat::PPCDoubleDoubleLegacy())
174 return S_PPCDoubleDoubleLegacy;
175 else if (&Sem == &llvm::APFloat::Float8E5M2())
176 return S_Float8E5M2;
177 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
178 return S_Float8E5M2FNUZ;
179 else if (&Sem == &llvm::APFloat::Float8E4M3())
180 return S_Float8E4M3;
181 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
182 return S_Float8E4M3FN;
183 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
184 return S_Float8E4M3FNUZ;
185 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
186 return S_Float8E4M3B11FNUZ;
187 else if (&Sem == &llvm::APFloat::Float8E3M4())
188 return S_Float8E3M4;
189 else if (&Sem == &llvm::APFloat::FloatTF32())
190 return S_FloatTF32;
191 else if (&Sem == &llvm::APFloat::Float8E8M0FNU())
192 return S_Float8E8M0FNU;
193 else if (&Sem == &llvm::APFloat::Float6E3M2FN())
194 return S_Float6E3M2FN;
195 else if (&Sem == &llvm::APFloat::Float6E2M3FN())
196 return S_Float6E2M3FN;
197 else if (&Sem == &llvm::APFloat::Float4E2M1FN())
198 return S_Float4E2M1FN;
199 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
200 return S_x87DoubleExtended;
201 else
202 llvm_unreachable("Unknown floating semantics");
203}
204
205bool APFloatBase::isRepresentableBy(const fltSemantics &A,
206 const fltSemantics &B) {
207 return A.maxExponent <= B.maxExponent && A.minExponent >= B.minExponent &&
208 A.precision <= B.precision;
209}
210
211/* A tight upper bound on number of parts required to hold the value
212 pow(5, power) is
213
214 power * 815 / (351 * integerPartWidth) + 1
215
216 However, whilst the result may require only this many parts,
217 because we are multiplying two values to get it, the
218 multiplication may require an extra part with the excess part
219 being zero (consider the trivial case of 1 * 1, tcFullMultiply
220 requires two parts to hold the single-part result). So we add an
221 extra one to guarantee enough space whilst multiplying. */
222const unsigned int maxExponent = 16383;
223const unsigned int maxPrecision = 113;
224const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
225const unsigned int maxPowerOfFiveParts =
226 2 +
227 ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
228
229unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
230 return semantics.precision;
231}
232APFloatBase::ExponentType
233APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
234 return semantics.maxExponent;
235}
236APFloatBase::ExponentType
237APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
238 return semantics.minExponent;
239}
240unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
241 return semantics.sizeInBits;
242}
243unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,
244 bool isSigned) {
245 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
246 // at least one more bit than the MaxExponent to hold the max FP value.
247 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
248 // Extra sign bit needed.
249 if (isSigned)
250 ++MinBitWidth;
251 return MinBitWidth;
252}
253
254bool APFloatBase::semanticsHasZero(const fltSemantics &semantics) {
255 return semantics.hasZero;
256}
257
258bool APFloatBase::semanticsHasSignedRepr(const fltSemantics &semantics) {
259 return semantics.hasSignedRepr;
260}
261
262bool APFloatBase::semanticsHasInf(const fltSemantics &semantics) {
263 return semantics.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754;
264}
265
266bool APFloatBase::semanticsHasNaN(const fltSemantics &semantics) {
267 return semantics.nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly;
268}
269
270bool APFloatBase::isIEEELikeFP(const fltSemantics &semantics) {
271 // Keep in sync with Type::isIEEELikeFPTy
272 return SemanticsToEnum(Sem: semantics) <= S_IEEEquad;
273}
274
275bool APFloatBase::hasSignBitInMSB(const fltSemantics &semantics) {
276 return semantics.hasSignBitInMSB;
277}
278
279bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,
280 const fltSemantics &Dst) {
281 // Exponent range must be larger.
282 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
283 return false;
284
285 // If the mantissa is long enough, the result value could still be denormal
286 // with a larger exponent range.
287 //
288 // FIXME: This condition is probably not accurate but also shouldn't be a
289 // practical concern with existing types.
290 return Dst.precision >= Src.precision;
291}
292
293unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
294 return Sem.sizeInBits;
295}
296
297static constexpr APFloatBase::ExponentType
298exponentZero(const fltSemantics &semantics) {
299 return semantics.minExponent - 1;
300}
301
302static constexpr APFloatBase::ExponentType
303exponentInf(const fltSemantics &semantics) {
304 return semantics.maxExponent + 1;
305}
306
307static constexpr APFloatBase::ExponentType
308exponentNaN(const fltSemantics &semantics) {
309 if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
310 if (semantics.nanEncoding == fltNanEncoding::NegativeZero)
311 return exponentZero(semantics);
312 if (semantics.hasSignedRepr)
313 return semantics.maxExponent;
314 }
315 return semantics.maxExponent + 1;
316}
317
318/* A bunch of private, handy routines. */
319
320static inline Error createError(const Twine &Err) {
321 return make_error<StringError>(Args: Err, Args: inconvertibleErrorCode());
322}
323
324static constexpr inline unsigned int partCountForBits(unsigned int bits) {
325 return std::max(a: 1u, b: (bits + APFloatBase::integerPartWidth - 1) /
326 APFloatBase::integerPartWidth);
327}
328
329/* Returns 0U-9U. Return values >= 10U are not digits. */
330static inline unsigned int
331decDigitValue(unsigned int c)
332{
333 return c - '0';
334}
335
336/* Return the value of a decimal exponent of the form
337 [+-]ddddddd.
338
339 If the exponent overflows, returns a large exponent with the
340 appropriate sign. */
341static Expected<int> readExponent(StringRef::iterator begin,
342 StringRef::iterator end) {
343 const unsigned int overlargeExponent = 24000; /* FIXME. */
344 StringRef::iterator p = begin;
345
346 // Treat no exponent as 0 to match binutils
347 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end))
348 return 0;
349
350 bool isNegative = *p == '-';
351 if (*p == '-' || *p == '+') {
352 p++;
353 if (p == end)
354 return createError(Err: "Exponent has no digits");
355 }
356
357 unsigned absExponent = decDigitValue(c: *p++);
358 if (absExponent >= 10U)
359 return createError(Err: "Invalid character in exponent");
360
361 for (; p != end; ++p) {
362 unsigned value = decDigitValue(c: *p);
363 if (value >= 10U)
364 return createError(Err: "Invalid character in exponent");
365
366 absExponent = absExponent * 10U + value;
367 if (absExponent >= overlargeExponent) {
368 absExponent = overlargeExponent;
369 break;
370 }
371 }
372
373 if (isNegative)
374 return -(int) absExponent;
375 else
376 return (int) absExponent;
377}
378
379/* This is ugly and needs cleaning up, but I don't immediately see
380 how whilst remaining safe. */
381static Expected<int> totalExponent(StringRef::iterator p,
382 StringRef::iterator end,
383 int exponentAdjustment) {
384 int exponent = 0;
385
386 if (p == end)
387 return createError(Err: "Exponent has no digits");
388
389 bool negative = *p == '-';
390 if (*p == '-' || *p == '+') {
391 p++;
392 if (p == end)
393 return createError(Err: "Exponent has no digits");
394 }
395
396 int unsignedExponent = 0;
397 bool overflow = false;
398 for (; p != end; ++p) {
399 unsigned int value;
400
401 value = decDigitValue(c: *p);
402 if (value >= 10U)
403 return createError(Err: "Invalid character in exponent");
404
405 unsignedExponent = unsignedExponent * 10 + value;
406 if (unsignedExponent > 32767) {
407 overflow = true;
408 break;
409 }
410 }
411
412 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
413 overflow = true;
414
415 if (!overflow) {
416 exponent = unsignedExponent;
417 if (negative)
418 exponent = -exponent;
419 exponent += exponentAdjustment;
420 if (exponent > 32767 || exponent < -32768)
421 overflow = true;
422 }
423
424 if (overflow)
425 exponent = negative ? -32768: 32767;
426
427 return exponent;
428}
429
430static Expected<StringRef::iterator>
431skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
432 StringRef::iterator *dot) {
433 StringRef::iterator p = begin;
434 *dot = end;
435 while (p != end && *p == '0')
436 p++;
437
438 if (p != end && *p == '.') {
439 *dot = p++;
440
441 if (end - begin == 1)
442 return createError(Err: "Significand has no digits");
443
444 while (p != end && *p == '0')
445 p++;
446 }
447
448 return p;
449}
450
451/* Given a normal decimal floating point number of the form
452
453 dddd.dddd[eE][+-]ddd
454
455 where the decimal point and exponent are optional, fill out the
456 structure D. Exponent is appropriate if the significand is
457 treated as an integer, and normalizedExponent if the significand
458 is taken to have the decimal point after a single leading
459 non-zero digit.
460
461 If the value is zero, V->firstSigDigit points to a non-digit, and
462 the return exponent is zero.
463*/
464struct decimalInfo {
465 const char *firstSigDigit;
466 const char *lastSigDigit;
467 int exponent;
468 int normalizedExponent;
469};
470
471static Error interpretDecimal(StringRef::iterator begin,
472 StringRef::iterator end, decimalInfo *D) {
473 StringRef::iterator dot = end;
474
475 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, dot: &dot);
476 if (!PtrOrErr)
477 return PtrOrErr.takeError();
478 StringRef::iterator p = *PtrOrErr;
479
480 D->firstSigDigit = p;
481 D->exponent = 0;
482 D->normalizedExponent = 0;
483
484 for (; p != end; ++p) {
485 if (*p == '.') {
486 if (dot != end)
487 return createError(Err: "String contains multiple dots");
488 dot = p++;
489 if (p == end)
490 break;
491 }
492 if (decDigitValue(c: *p) >= 10U)
493 break;
494 }
495
496 if (p != end) {
497 if (*p != 'e' && *p != 'E')
498 return createError(Err: "Invalid character in significand");
499 if (p == begin)
500 return createError(Err: "Significand has no digits");
501 if (dot != end && p - begin == 1)
502 return createError(Err: "Significand has no digits");
503
504 /* p points to the first non-digit in the string */
505 auto ExpOrErr = readExponent(begin: p + 1, end);
506 if (!ExpOrErr)
507 return ExpOrErr.takeError();
508 D->exponent = *ExpOrErr;
509
510 /* Implied decimal point? */
511 if (dot == end)
512 dot = p;
513 }
514
515 /* If number is all zeroes accept any exponent. */
516 if (p != D->firstSigDigit) {
517 /* Drop insignificant trailing zeroes. */
518 if (p != begin) {
519 do
520 do
521 p--;
522 while (p != begin && *p == '0');
523 while (p != begin && *p == '.');
524 }
525
526 /* Adjust the exponents for any decimal point. */
527 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
528 D->normalizedExponent = (D->exponent +
529 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
530 - (dot > D->firstSigDigit && dot < p)));
531 }
532
533 D->lastSigDigit = p;
534 return Error::success();
535}
536
537/* Return the trailing fraction of a hexadecimal number.
538 DIGITVALUE is the first hex digit of the fraction, P points to
539 the next digit. */
540static Expected<lostFraction>
541trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
542 unsigned int digitValue) {
543 /* If the first trailing digit isn't 0 or 8 we can work out the
544 fraction immediately. */
545 if (digitValue > 8)
546 return lfMoreThanHalf;
547 else if (digitValue < 8 && digitValue > 0)
548 return lfLessThanHalf;
549
550 // Otherwise we need to find the first non-zero digit.
551 while (p != end && (*p == '0' || *p == '.'))
552 p++;
553
554 if (p == end)
555 return createError(Err: "Invalid trailing hexadecimal fraction!");
556
557 unsigned hexDigit = hexDigitValue(C: *p);
558
559 /* If we ran off the end it is exactly zero or one-half, otherwise
560 a little more. */
561 if (hexDigit == UINT_MAX)
562 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
563 else
564 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
565}
566
567/* Return the fraction lost were a bignum truncated losing the least
568 significant BITS bits. */
569static lostFraction
570lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
571 unsigned int partCount,
572 unsigned int bits)
573{
574 unsigned lsb = APInt::tcLSB(parts, n: partCount);
575
576 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
577 if (bits <= lsb)
578 return lfExactlyZero;
579 if (bits == lsb + 1)
580 return lfExactlyHalf;
581 if (bits <= partCount * APFloatBase::integerPartWidth &&
582 APInt::tcExtractBit(parts, bit: bits - 1))
583 return lfMoreThanHalf;
584
585 return lfLessThanHalf;
586}
587
588/* Shift DST right BITS bits noting lost fraction. */
589static lostFraction
590shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
591{
592 lostFraction lost_fraction = lostFractionThroughTruncation(parts: dst, partCount: parts, bits);
593
594 APInt::tcShiftRight(dst, Words: parts, Count: bits);
595
596 return lost_fraction;
597}
598
599/* Combine the effect of two lost fractions. */
600static lostFraction
601combineLostFractions(lostFraction moreSignificant,
602 lostFraction lessSignificant)
603{
604 if (lessSignificant != lfExactlyZero) {
605 if (moreSignificant == lfExactlyZero)
606 moreSignificant = lfLessThanHalf;
607 else if (moreSignificant == lfExactlyHalf)
608 moreSignificant = lfMoreThanHalf;
609 }
610
611 return moreSignificant;
612}
613
614/* The error from the true value, in half-ulps, on multiplying two
615 floating point numbers, which differ from the value they
616 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
617 than the returned value.
618
619 See "How to Read Floating Point Numbers Accurately" by William D
620 Clinger. */
621static unsigned int
622HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
623{
624 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
625
626 if (HUerr1 + HUerr2 == 0)
627 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
628 else
629 return inexactMultiply + 2 * (HUerr1 + HUerr2);
630}
631
632/* The number of ulps from the boundary (zero, or half if ISNEAREST)
633 when the least significant BITS are truncated. BITS cannot be
634 zero. */
635static APFloatBase::integerPart
636ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
637 bool isNearest) {
638 assert(bits != 0);
639
640 bits--;
641 unsigned count = bits / APFloatBase::integerPartWidth;
642 unsigned partBits = bits % APFloatBase::integerPartWidth + 1;
643
644 APFloatBase::integerPart part =
645 parts[count] & (~(APFloatBase::integerPart)0 >>
646 (APFloatBase::integerPartWidth - partBits));
647
648 APFloatBase::integerPart boundary;
649 if (isNearest)
650 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
651 else
652 boundary = 0;
653
654 if (count == 0) {
655 if (part - boundary <= boundary - part)
656 return part - boundary;
657 else
658 return boundary - part;
659 }
660
661 if (part == boundary) {
662 while (--count)
663 if (parts[count])
664 return ~(APFloatBase::integerPart) 0; /* A lot. */
665
666 return parts[0];
667 } else if (part == boundary - 1) {
668 while (--count)
669 if (~parts[count])
670 return ~(APFloatBase::integerPart) 0; /* A lot. */
671
672 return -parts[0];
673 }
674
675 return ~(APFloatBase::integerPart) 0; /* A lot. */
676}
677
678/* Place pow(5, power) in DST, and return the number of parts used.
679 DST must be at least one part larger than size of the answer. */
680static unsigned int
681powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
682 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
683 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
684 pow5s[0] = 78125 * 5;
685
686 unsigned int partsCount = 1;
687 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
688 assert(power <= maxExponent);
689
690 p1 = dst;
691 p2 = scratch;
692
693 *p1 = firstEightPowers[power & 7];
694 power >>= 3;
695
696 unsigned result = 1;
697 pow5 = pow5s;
698
699 for (unsigned int n = 0; power; power >>= 1, n++) {
700 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
701 if (n != 0) {
702 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
703 partsCount, partsCount);
704 partsCount *= 2;
705 if (pow5[partsCount - 1] == 0)
706 partsCount--;
707 }
708
709 if (power & 1) {
710 APFloatBase::integerPart *tmp;
711
712 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
713 result += partsCount;
714 if (p2[result - 1] == 0)
715 result--;
716
717 /* Now result is in p1 with partsCount parts and p2 is scratch
718 space. */
719 tmp = p1;
720 p1 = p2;
721 p2 = tmp;
722 }
723
724 pow5 += partsCount;
725 }
726
727 if (p1 != dst)
728 APInt::tcAssign(dst, p1, result);
729
730 return result;
731}
732
733/* Zero at the end to avoid modular arithmetic when adding one; used
734 when rounding up during hexadecimal output. */
735static const char hexDigitsLower[] = "0123456789abcdef0";
736static const char hexDigitsUpper[] = "0123456789ABCDEF0";
737static const char infinityL[] = "infinity";
738static const char infinityU[] = "INFINITY";
739static const char NaNL[] = "nan";
740static const char NaNU[] = "NAN";
741
742/* Write out an integerPart in hexadecimal, starting with the most
743 significant nibble. Write out exactly COUNT hexdigits, return
744 COUNT. */
745static unsigned int
746partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
747 const char *hexDigitChars)
748{
749 unsigned int result = count;
750
751 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
752
753 part >>= (APFloatBase::integerPartWidth - 4 * count);
754 while (count--) {
755 dst[count] = hexDigitChars[part & 0xf];
756 part >>= 4;
757 }
758
759 return result;
760}
761
762/* Write out an unsigned decimal integer. */
763static char *writeUnsignedDecimal(char *dst, unsigned int n) {
764 char buff[40], *p;
765
766 p = buff;
767 do
768 *p++ = '0' + n % 10;
769 while (n /= 10);
770
771 do
772 *dst++ = *--p;
773 while (p != buff);
774
775 return dst;
776}
777
778/* Write out a signed decimal integer. */
779static char *writeSignedDecimal(char *dst, int value) {
780 if (value < 0) {
781 *dst++ = '-';
782 dst = writeUnsignedDecimal(dst, n: -(unsigned) value);
783 } else {
784 dst = writeUnsignedDecimal(dst, n: value);
785 }
786
787 return dst;
788}
789
790// Compute the ULP of the input using a definition from:
791// Jean-Michel Muller. On the definition of ulp(x). [Research Report] RR-5504,
792// LIP RR-2005-09, INRIA, LIP. 2005, pp.16. inria-00070503
793static APFloat harrisonUlp(const APFloat &X) {
794 const fltSemantics &Sem = X.getSemantics();
795 switch (X.getCategory()) {
796 case APFloat::fcNaN:
797 return APFloat::getQNaN(Sem);
798 case APFloat::fcInfinity:
799 return APFloat::getInf(Sem);
800 case APFloat::fcZero:
801 return APFloat::getSmallest(Sem);
802 case APFloat::fcNormal:
803 break;
804 }
805 if (X.isDenormal() || X.isSmallestNormalized())
806 return APFloat::getSmallest(Sem);
807 int Exp = ilogb(Arg: X);
808 if (X.getExactLog2() != INT_MIN)
809 Exp -= 1;
810 return scalbn(X: APFloat::getOne(Sem), Exp: Exp - (Sem.precision - 1),
811 RM: APFloat::rmNearestTiesToEven);
812}
813
814namespace detail {
815/* Constructors. */
816void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
817 semantics = ourSemantics;
818 unsigned count = partCount();
819 if (count > 1)
820 significand.parts = new integerPart[count];
821}
822
823void IEEEFloat::freeSignificand() {
824 if (needsCleanup())
825 delete [] significand.parts;
826}
827
828void IEEEFloat::assign(const IEEEFloat &rhs) {
829 assert(semantics == rhs.semantics);
830
831 sign = rhs.sign;
832 category = rhs.category;
833 exponent = rhs.exponent;
834 if (isFiniteNonZero() || category == fcNaN)
835 copySignificand(rhs);
836}
837
838void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
839 assert(isFiniteNonZero() || category == fcNaN);
840 assert(rhs.partCount() >= partCount());
841
842 APInt::tcAssign(significandParts(), rhs.significandParts(),
843 partCount());
844}
845
846/* Make this number a NaN, with an arbitrary but deterministic value
847 for the significand. If double or longer, this is a signalling NaN,
848 which may not be ideal. If float, this is QNaN(0). */
849void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
850 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
851 llvm_unreachable("This floating point format does not support NaN");
852
853 if (Negative && !semantics->hasSignedRepr)
854 llvm_unreachable(
855 "This floating point format does not support signed values");
856
857 category = fcNaN;
858 sign = Negative;
859 exponent = exponentNaN();
860
861 integerPart *significand = significandParts();
862 unsigned numParts = partCount();
863
864 APInt fill_storage;
865 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
866 // Finite-only types do not distinguish signalling and quiet NaN, so
867 // make them all signalling.
868 SNaN = false;
869 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
870 sign = true;
871 fill_storage = APInt::getZero(numBits: semantics->precision - 1);
872 } else {
873 fill_storage = APInt::getAllOnes(numBits: semantics->precision - 1);
874 }
875 fill = &fill_storage;
876 }
877
878 // Set the significand bits to the fill.
879 if (!fill || fill->getNumWords() < numParts)
880 APInt::tcSet(significand, 0, numParts);
881 if (fill) {
882 APInt::tcAssign(significand, fill->getRawData(),
883 std::min(a: fill->getNumWords(), b: numParts));
884
885 // Zero out the excess bits of the significand.
886 unsigned bitsToPreserve = semantics->precision - 1;
887 unsigned part = bitsToPreserve / 64;
888 bitsToPreserve %= 64;
889 significand[part] &= ((1ULL << bitsToPreserve) - 1);
890 for (part++; part != numParts; ++part)
891 significand[part] = 0;
892 }
893
894 unsigned QNaNBit =
895 (semantics->precision >= 2) ? (semantics->precision - 2) : 0;
896
897 if (SNaN) {
898 // We always have to clear the QNaN bit to make it an SNaN.
899 APInt::tcClearBit(significand, bit: QNaNBit);
900
901 // If there are no bits set in the payload, we have to set
902 // *something* to make it a NaN instead of an infinity;
903 // conventionally, this is the next bit down from the QNaN bit.
904 if (APInt::tcIsZero(significand, numParts))
905 APInt::tcSetBit(significand, bit: QNaNBit - 1);
906 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
907 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
908 // Do nothing.
909 } else {
910 // We always have to set the QNaN bit to make it a QNaN.
911 APInt::tcSetBit(significand, bit: QNaNBit);
912 }
913
914 // For x87 extended precision, we want to make a NaN, not a
915 // pseudo-NaN. Maybe we should expose the ability to make
916 // pseudo-NaNs?
917 if (semantics == &APFloatBase::semX87DoubleExtended)
918 APInt::tcSetBit(significand, bit: QNaNBit + 1);
919}
920
921IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
922 if (this != &rhs) {
923 if (semantics != rhs.semantics) {
924 freeSignificand();
925 initialize(ourSemantics: rhs.semantics);
926 }
927 assign(rhs);
928 }
929
930 return *this;
931}
932
933IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
934 freeSignificand();
935
936 semantics = rhs.semantics;
937 significand = rhs.significand;
938 exponent = rhs.exponent;
939 category = rhs.category;
940 sign = rhs.sign;
941
942 rhs.semantics = &APFloatBase::semBogus;
943 return *this;
944}
945
946bool IEEEFloat::isDenormal() const {
947 return getSemantics().hasDenormals && isFiniteNonZero() &&
948 (exponent == semantics->minExponent) &&
949 (APInt::tcExtractBit(significandParts(), bit: semantics->precision - 1) ==
950 0);
951}
952
953bool IEEEFloat::isSmallest() const {
954 // The smallest number by magnitude in our format will be the smallest
955 // denormal, i.e. the floating point number with exponent being minimum
956 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
957 return isFiniteNonZero() && exponent == semantics->minExponent &&
958 significandMSB() == 0;
959}
960
961bool IEEEFloat::isSmallestNormalized() const {
962 return getCategory() == fcNormal && exponent == semantics->minExponent &&
963 isSignificandAllZerosExceptMSB();
964}
965
966unsigned int IEEEFloat::getNumHighBits() const {
967 const unsigned int PartCount = partCountForBits(bits: semantics->precision);
968 const unsigned int Bits = PartCount * integerPartWidth;
969
970 // Compute how many bits are used in the final word.
971 // When precision is just 1, it represents the 'Pth'
972 // Precision bit and not the actual significand bit.
973 const unsigned int NumHighBits = (semantics->precision > 1)
974 ? (Bits - semantics->precision + 1)
975 : (Bits - semantics->precision);
976 return NumHighBits;
977}
978
979bool IEEEFloat::isSignificandAllOnes() const {
980 // Test if the significand excluding the integral bit is all ones. This allows
981 // us to test for binade boundaries.
982 const integerPart *Parts = significandParts();
983 const unsigned PartCount = partCountForBits(bits: semantics->precision);
984 for (unsigned i = 0; i < PartCount - 1; i++)
985 if (~Parts[i])
986 return false;
987
988 // Set the unused high bits to all ones when we compare.
989 const unsigned NumHighBits = getNumHighBits();
990 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
991 "Can not have more high bits to fill than integerPartWidth");
992 const integerPart HighBitFill =
993 ~integerPart(0) << (integerPartWidth - NumHighBits);
994 if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill)))
995 return false;
996
997 return true;
998}
999
1000bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1001 // Test if the significand excluding the integral bit is all ones except for
1002 // the least significant bit.
1003 const integerPart *Parts = significandParts();
1004
1005 if (Parts[0] & 1)
1006 return false;
1007
1008 const unsigned PartCount = partCountForBits(bits: semantics->precision);
1009 for (unsigned i = 0; i < PartCount - 1; i++) {
1010 if (~Parts[i] & ~unsigned{!i})
1011 return false;
1012 }
1013
1014 // Set the unused high bits to all ones when we compare.
1015 const unsigned NumHighBits = getNumHighBits();
1016 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1017 "Can not have more high bits to fill than integerPartWidth");
1018 const integerPart HighBitFill = ~integerPart(0)
1019 << (integerPartWidth - NumHighBits);
1020 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1021 return false;
1022
1023 return true;
1024}
1025
1026bool IEEEFloat::isSignificandAllZeros() const {
1027 // Test if the significand excluding the integral bit is all zeros. This
1028 // allows us to test for binade boundaries.
1029 const integerPart *Parts = significandParts();
1030 const unsigned PartCount = partCountForBits(bits: semantics->precision);
1031
1032 for (unsigned i = 0; i < PartCount - 1; i++)
1033 if (Parts[i])
1034 return false;
1035
1036 // Compute how many bits are used in the final word.
1037 const unsigned NumHighBits = getNumHighBits();
1038 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1039 "clear than integerPartWidth");
1040 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1041
1042 if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask))
1043 return false;
1044
1045 return true;
1046}
1047
1048bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1049 const integerPart *Parts = significandParts();
1050 const unsigned PartCount = partCountForBits(bits: semantics->precision);
1051
1052 for (unsigned i = 0; i < PartCount - 1; i++) {
1053 if (Parts[i])
1054 return false;
1055 }
1056
1057 const unsigned NumHighBits = getNumHighBits();
1058 const integerPart MSBMask = integerPart(1)
1059 << (integerPartWidth - NumHighBits);
1060 return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask));
1061}
1062
1063bool IEEEFloat::isLargest() const {
1064 bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent;
1065 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1066 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1067 // The largest number by magnitude in our format will be the floating point
1068 // number with maximum exponent and with significand that is all ones except
1069 // the LSB.
1070 return (IsMaxExp && APFloat::hasSignificand(Sem: *semantics))
1071 ? isSignificandAllOnesExceptLSB()
1072 : IsMaxExp;
1073 } else {
1074 // The largest number by magnitude in our format will be the floating point
1075 // number with maximum exponent and with significand that is all ones.
1076 return IsMaxExp && isSignificandAllOnes();
1077 }
1078}
1079
1080bool IEEEFloat::isInteger() const {
1081 // This could be made more efficient; I'm going for obviously correct.
1082 if (!isFinite()) return false;
1083 IEEEFloat truncated = *this;
1084 truncated.roundToIntegral(rmTowardZero);
1085 return compare(truncated) == cmpEqual;
1086}
1087
1088bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1089 if (this == &rhs)
1090 return true;
1091 if (semantics != rhs.semantics ||
1092 category != rhs.category ||
1093 sign != rhs.sign)
1094 return false;
1095 if (category==fcZero || category==fcInfinity)
1096 return true;
1097
1098 if (isFiniteNonZero() && exponent != rhs.exponent)
1099 return false;
1100
1101 return std::equal(first1: significandParts(), last1: significandParts() + partCount(),
1102 first2: rhs.significandParts());
1103}
1104
1105IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
1106 initialize(ourSemantics: &ourSemantics);
1107 sign = 0;
1108 category = fcNormal;
1109 zeroSignificand();
1110 exponent = ourSemantics.precision - 1;
1111 significandParts()[0] = value;
1112 normalize(rmNearestTiesToEven, lfExactlyZero);
1113}
1114
1115IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
1116 initialize(ourSemantics: &ourSemantics);
1117 // The Float8E8MOFNU format does not have a representation
1118 // for zero. So, use the closest representation instead.
1119 // Moreover, the all-zero encoding represents a valid
1120 // normal value (which is the smallestNormalized here).
1121 // Hence, we call makeSmallestNormalized (where category is
1122 // 'fcNormal') instead of makeZero (where category is 'fcZero').
1123 ourSemantics.hasZero ? makeZero(Neg: false) : makeSmallestNormalized(Negative: false);
1124}
1125
1126// Delegate to the previous constructor, because later copy constructor may
1127// actually inspects category, which can't be garbage.
1128IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
1129 : IEEEFloat(ourSemantics) {}
1130
1131IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
1132 initialize(ourSemantics: rhs.semantics);
1133 assign(rhs);
1134}
1135
1136IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&APFloatBase::semBogus) {
1137 *this = std::move(rhs);
1138}
1139
1140IEEEFloat::~IEEEFloat() { freeSignificand(); }
1141
1142unsigned int IEEEFloat::partCount() const {
1143 return partCountForBits(bits: semantics->precision + 1);
1144}
1145
1146const APFloat::integerPart *IEEEFloat::significandParts() const {
1147 return const_cast<IEEEFloat *>(this)->significandParts();
1148}
1149
1150APFloat::integerPart *IEEEFloat::significandParts() {
1151 if (partCount() > 1)
1152 return significand.parts;
1153 else
1154 return &significand.part;
1155}
1156
1157void IEEEFloat::zeroSignificand() {
1158 APInt::tcSet(significandParts(), 0, partCount());
1159}
1160
1161/* Increment an fcNormal floating point number's significand. */
1162void IEEEFloat::incrementSignificand() {
1163 [[maybe_unused]] integerPart carry =
1164 APInt::tcIncrement(dst: significandParts(), parts: partCount());
1165
1166 /* Our callers should never cause us to overflow. */
1167 assert(carry == 0);
1168}
1169
1170/* Add the significand of the RHS. Returns the carry flag. */
1171APFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1172 integerPart *parts = significandParts();
1173
1174 assert(semantics == rhs.semantics);
1175 assert(exponent == rhs.exponent);
1176
1177 return APInt::tcAdd(parts, rhs.significandParts(), carry: 0, partCount());
1178}
1179
1180/* Subtract the significand of the RHS with a borrow flag. Returns
1181 the borrow flag. */
1182APFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1183 integerPart borrow) {
1184 integerPart *parts = significandParts();
1185
1186 assert(semantics == rhs.semantics);
1187 assert(exponent == rhs.exponent);
1188
1189 return APInt::tcSubtract(parts, rhs.significandParts(), carry: borrow,
1190 partCount());
1191}
1192
1193/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1194 on to the full-precision result of the multiplication. Returns the
1195 lost fraction. */
1196lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1197 IEEEFloat addend,
1198 bool ignoreAddend) {
1199 integerPart scratch[4];
1200 bool ignored;
1201
1202 assert(semantics == rhs.semantics);
1203
1204 unsigned precision = semantics->precision;
1205
1206 // Allocate space for twice as many bits as the original significand, plus one
1207 // extra bit for the addition to overflow into.
1208 unsigned newPartsCount = partCountForBits(bits: precision * 2 + 1);
1209
1210 // FIXME: Replace with SmallVector<4>.
1211 integerPart *fullSignificand =
1212 newPartsCount > 4 ? new integerPart[newPartsCount] : scratch;
1213
1214 integerPart *lhsSignificand = significandParts();
1215 unsigned partsCount = partCount();
1216
1217 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1218 rhs.significandParts(), partsCount, partsCount);
1219
1220 lostFraction lost_fraction = lfExactlyZero;
1221 // One, not zero, based MSB.
1222 unsigned omsb = APInt::tcMSB(parts: fullSignificand, n: newPartsCount) + 1;
1223 exponent += rhs.exponent;
1224
1225 // Assume the operands involved in the multiplication are single-precision
1226 // FP, and the two multiplicants are:
1227 // *this = a23 . a22 ... a0 * 2^e1
1228 // rhs = b23 . b22 ... b0 * 2^e2
1229 // the result of multiplication is:
1230 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1231 // Note that there are three significant bits at the left-hand side of the
1232 // radix point: two for the multiplication, and an overflow bit for the
1233 // addition (that will always be zero at this point). Move the radix point
1234 // toward left by two bits, and adjust exponent accordingly.
1235 exponent += 2;
1236
1237 if (!ignoreAddend && addend.isNonZero()) {
1238 // The intermediate result of the multiplication has "2 * precision"
1239 // signicant bit; adjust the addend to be consistent with mul result.
1240 //
1241 Significand savedSignificand = significand;
1242 const fltSemantics *savedSemantics = semantics;
1243
1244 // Normalize our MSB to one below the top bit to allow for overflow.
1245 unsigned extendedPrecision = 2 * precision + 1;
1246 if (omsb != extendedPrecision - 1) {
1247 assert(extendedPrecision > omsb);
1248 APInt::tcShiftLeft(fullSignificand, Words: newPartsCount,
1249 Count: (extendedPrecision - 1) - omsb);
1250 exponent -= (extendedPrecision - 1) - omsb;
1251 }
1252
1253 /* Create new semantics. */
1254 fltSemantics extendedSemantics = *semantics;
1255 extendedSemantics.precision = extendedPrecision;
1256
1257 if (newPartsCount == 1)
1258 significand.part = fullSignificand[0];
1259 else
1260 significand.parts = fullSignificand;
1261 semantics = &extendedSemantics;
1262
1263 // Make a copy so we can convert it to the extended semantics.
1264 // Note that we cannot convert the addend directly, as the extendedSemantics
1265 // is a local variable (which we take a reference to).
1266 IEEEFloat extendedAddend(addend);
1267 [[maybe_unused]] opStatus status = extendedAddend.convert(
1268 extendedSemantics, APFloat::rmTowardZero, &ignored);
1269 assert(status == APFloat::opOK);
1270
1271 // Shift the significand of the addend right by one bit. This guarantees
1272 // that the high bit of the significand is zero (same as fullSignificand),
1273 // so the addition will overflow (if it does overflow at all) into the top bit.
1274 lost_fraction = extendedAddend.shiftSignificandRight(1);
1275 assert(lost_fraction == lfExactlyZero &&
1276 "Lost precision while shifting addend for fused-multiply-add.");
1277
1278 lost_fraction = addOrSubtractSignificand(extendedAddend, subtract: false);
1279
1280 /* Restore our state. */
1281 if (newPartsCount == 1)
1282 fullSignificand[0] = significand.part;
1283 significand = savedSignificand;
1284 semantics = savedSemantics;
1285
1286 omsb = APInt::tcMSB(parts: fullSignificand, n: newPartsCount) + 1;
1287 }
1288
1289 // Convert the result having "2 * precision" significant-bits back to the one
1290 // having "precision" significant-bits. First, move the radix point from
1291 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1292 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1293 exponent -= precision + 1;
1294
1295 // In case MSB resides at the left-hand side of radix point, shift the
1296 // mantissa right by some amount to make sure the MSB reside right before
1297 // the radix point (i.e. "MSB . rest-significant-bits").
1298 //
1299 // Note that the result is not normalized when "omsb < precision". So, the
1300 // caller needs to call IEEEFloat::normalize() if normalized value is
1301 // expected.
1302 if (omsb > precision) {
1303 unsigned int bits, significantParts;
1304 lostFraction lf;
1305
1306 bits = omsb - precision;
1307 significantParts = partCountForBits(bits: omsb);
1308 lf = shiftRight(dst: fullSignificand, parts: significantParts, bits);
1309 lost_fraction = combineLostFractions(moreSignificant: lf, lessSignificant: lost_fraction);
1310 exponent += bits;
1311 }
1312
1313 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1314
1315 if (newPartsCount > 4)
1316 delete [] fullSignificand;
1317
1318 return lost_fraction;
1319}
1320
1321lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1322 // When the given semantics has zero, the addend here is a zero.
1323 // i.e . it belongs to the 'fcZero' category.
1324 // But when the semantics does not support zero, we need to
1325 // explicitly convey that this addend should be ignored
1326 // for multiplication.
1327 return multiplySignificand(rhs, addend: IEEEFloat(*semantics), ignoreAddend: !semantics->hasZero);
1328}
1329
1330/* Multiply the significands of LHS and RHS to DST. */
1331lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1332 integerPart scratch[4];
1333
1334 assert(semantics == rhs.semantics);
1335
1336 integerPart *lhsSignificand = significandParts();
1337 const integerPart *rhsSignificand = rhs.significandParts();
1338 unsigned partsCount = partCount();
1339
1340 integerPart *dividend =
1341 partsCount > 2 ? new integerPart[partsCount * 2] : scratch;
1342 integerPart *divisor = dividend + partsCount;
1343
1344 /* Copy the dividend and divisor as they will be modified in-place. */
1345 for (unsigned i = 0; i < partsCount; i++) {
1346 dividend[i] = lhsSignificand[i];
1347 divisor[i] = rhsSignificand[i];
1348 lhsSignificand[i] = 0;
1349 }
1350
1351 exponent -= rhs.exponent;
1352
1353 unsigned int precision = semantics->precision;
1354
1355 /* Normalize the divisor. */
1356 unsigned bit = precision - APInt::tcMSB(parts: divisor, n: partsCount) - 1;
1357 if (bit) {
1358 exponent += bit;
1359 APInt::tcShiftLeft(divisor, Words: partsCount, Count: bit);
1360 }
1361
1362 /* Normalize the dividend. */
1363 bit = precision - APInt::tcMSB(parts: dividend, n: partsCount) - 1;
1364 if (bit) {
1365 exponent -= bit;
1366 APInt::tcShiftLeft(dividend, Words: partsCount, Count: bit);
1367 }
1368
1369 /* Ensure the dividend >= divisor initially for the loop below.
1370 Incidentally, this means that the division loop below is
1371 guaranteed to set the integer bit to one. */
1372 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1373 exponent--;
1374 APInt::tcShiftLeft(dividend, Words: partsCount, Count: 1);
1375 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1376 }
1377
1378 /* Long division. */
1379 for (bit = precision; bit; bit -= 1) {
1380 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1381 APInt::tcSubtract(dividend, divisor, carry: 0, partsCount);
1382 APInt::tcSetBit(lhsSignificand, bit: bit - 1);
1383 }
1384
1385 APInt::tcShiftLeft(dividend, Words: partsCount, Count: 1);
1386 }
1387
1388 /* Figure out the lost fraction. */
1389 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1390
1391 lostFraction lost_fraction;
1392 if (cmp > 0)
1393 lost_fraction = lfMoreThanHalf;
1394 else if (cmp == 0)
1395 lost_fraction = lfExactlyHalf;
1396 else if (APInt::tcIsZero(dividend, partsCount))
1397 lost_fraction = lfExactlyZero;
1398 else
1399 lost_fraction = lfLessThanHalf;
1400
1401 if (partsCount > 2)
1402 delete [] dividend;
1403
1404 return lost_fraction;
1405}
1406
1407unsigned int IEEEFloat::significandMSB() const {
1408 return APInt::tcMSB(parts: significandParts(), n: partCount());
1409}
1410
1411unsigned int IEEEFloat::significandLSB() const {
1412 return APInt::tcLSB(significandParts(), n: partCount());
1413}
1414
1415/* Note that a zero result is NOT normalized to fcZero. */
1416lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1417 /* Our exponent should not overflow. */
1418 assert((ExponentType) (exponent + bits) >= exponent);
1419
1420 exponent += bits;
1421
1422 return shiftRight(dst: significandParts(), parts: partCount(), bits);
1423}
1424
1425/* Shift the significand left BITS bits, subtract BITS from its exponent. */
1426void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1427 assert(bits < semantics->precision ||
1428 (semantics->precision == 1 && bits <= 1));
1429
1430 if (bits) {
1431 unsigned int partsCount = partCount();
1432
1433 APInt::tcShiftLeft(significandParts(), Words: partsCount, Count: bits);
1434 exponent -= bits;
1435
1436 assert(!APInt::tcIsZero(significandParts(), partsCount));
1437 }
1438}
1439
1440APFloat::cmpResult IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1441 assert(semantics == rhs.semantics);
1442 assert(isFiniteNonZero());
1443 assert(rhs.isFiniteNonZero());
1444
1445 int compare = exponent - rhs.exponent;
1446
1447 /* If exponents are equal, do an unsigned bignum comparison of the
1448 significands. */
1449 if (compare == 0)
1450 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1451 partCount());
1452
1453 if (compare > 0)
1454 return cmpGreaterThan;
1455 else if (compare < 0)
1456 return cmpLessThan;
1457 else
1458 return cmpEqual;
1459}
1460
1461/* Set the least significant BITS bits of a bignum, clear the
1462 rest. */
1463static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1464 unsigned bits) {
1465 unsigned i = 0;
1466 while (bits > APInt::APINT_BITS_PER_WORD) {
1467 dst[i++] = ~(APInt::WordType)0;
1468 bits -= APInt::APINT_BITS_PER_WORD;
1469 }
1470
1471 if (bits)
1472 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1473
1474 while (i < parts)
1475 dst[i++] = 0;
1476}
1477
1478/* Handle overflow. Sign is preserved. We either become infinity or
1479 the largest finite number. */
1480APFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1481 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) {
1482 /* Infinity? */
1483 if (rounding_mode == rmNearestTiesToEven ||
1484 rounding_mode == rmNearestTiesToAway ||
1485 (rounding_mode == rmTowardPositive && !sign) ||
1486 (rounding_mode == rmTowardNegative && sign)) {
1487 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1488 makeNaN(SNaN: false, Negative: sign);
1489 else
1490 category = fcInfinity;
1491 return static_cast<opStatus>(opOverflow | opInexact);
1492 }
1493 }
1494
1495 /* Otherwise we become the largest finite number. */
1496 category = fcNormal;
1497 exponent = semantics->maxExponent;
1498 tcSetLeastSignificantBits(dst: significandParts(), parts: partCount(),
1499 bits: semantics->precision);
1500 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1501 semantics->nanEncoding == fltNanEncoding::AllOnes)
1502 APInt::tcClearBit(significandParts(), bit: 0);
1503
1504 return opInexact;
1505}
1506
1507/* Returns TRUE if, when truncating the current number, with BIT the
1508 new LSB, with the given lost fraction and rounding mode, the result
1509 would need to be rounded away from zero (i.e., by increasing the
1510 signficand). This routine must work for fcZero of both signs, and
1511 fcNormal numbers. */
1512bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1513 lostFraction lost_fraction,
1514 unsigned int bit) const {
1515 /* NaNs and infinities should not have lost fractions. */
1516 assert(isFiniteNonZero() || category == fcZero);
1517
1518 /* Current callers never pass this so we don't handle it. */
1519 assert(lost_fraction != lfExactlyZero);
1520
1521 switch (rounding_mode) {
1522 case rmNearestTiesToAway:
1523 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1524
1525 case rmNearestTiesToEven:
1526 if (lost_fraction == lfMoreThanHalf)
1527 return true;
1528
1529 /* Our zeroes don't have a significand to test. */
1530 if (lost_fraction == lfExactlyHalf && category != fcZero)
1531 return APInt::tcExtractBit(significandParts(), bit);
1532
1533 return false;
1534
1535 case rmTowardZero:
1536 return false;
1537
1538 case rmTowardPositive:
1539 return !sign;
1540
1541 case rmTowardNegative:
1542 return sign;
1543
1544 default:
1545 break;
1546 }
1547 llvm_unreachable("Invalid rounding mode found");
1548}
1549
1550APFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1551 lostFraction lost_fraction) {
1552 if (!isFiniteNonZero())
1553 return opOK;
1554
1555 /* Before rounding normalize the exponent of fcNormal numbers. */
1556 /* One, not zero, based MSB. */
1557 unsigned omsb = significandMSB() + 1;
1558
1559 // Only skip this `if` if the value is exactly zero.
1560 if (omsb || lost_fraction != lfExactlyZero) {
1561 /* OMSB is numbered from 1. We want to place it in the integer
1562 bit numbered PRECISION if possible, with a compensating change in
1563 the exponent. */
1564 int exponentChange = omsb - semantics->precision;
1565
1566 /* If the resulting exponent is too high, overflow according to
1567 the rounding mode. */
1568 if (exponent + exponentChange > semantics->maxExponent)
1569 return handleOverflow(rounding_mode);
1570
1571 /* Subnormal numbers have exponent minExponent, and their MSB
1572 is forced based on that. */
1573 if (exponent + exponentChange < semantics->minExponent)
1574 exponentChange = semantics->minExponent - exponent;
1575
1576 /* Shifting left is easy as we don't lose precision. */
1577 if (exponentChange < 0) {
1578 assert(lost_fraction == lfExactlyZero);
1579
1580 shiftSignificandLeft(bits: -exponentChange);
1581
1582 return opOK;
1583 }
1584
1585 if (exponentChange > 0) {
1586 lostFraction lf;
1587
1588 /* Shift right and capture any new lost fraction. */
1589 lf = shiftSignificandRight(bits: exponentChange);
1590
1591 lost_fraction = combineLostFractions(moreSignificant: lf, lessSignificant: lost_fraction);
1592
1593 /* Keep OMSB up-to-date. */
1594 if (omsb > (unsigned) exponentChange)
1595 omsb -= exponentChange;
1596 else
1597 omsb = 0;
1598 }
1599 }
1600
1601 // The all-ones values is an overflow if NaN is all ones. If NaN is
1602 // represented by negative zero, then it is a valid finite value.
1603 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1604 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1605 exponent == semantics->maxExponent && isSignificandAllOnes())
1606 return handleOverflow(rounding_mode);
1607
1608 /* Now round the number according to rounding_mode given the lost
1609 fraction. */
1610
1611 /* As specified in IEEE 754, since we do not trap we do not report
1612 underflow for exact results. */
1613 if (lost_fraction == lfExactlyZero) {
1614 /* Canonicalize zeroes. */
1615 if (omsb == 0) {
1616 category = fcZero;
1617 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1618 sign = false;
1619 if (!semantics->hasZero)
1620 makeSmallestNormalized(Negative: false);
1621 }
1622
1623 return opOK;
1624 }
1625
1626 /* Increment the significand if we're rounding away from zero. */
1627 if (roundAwayFromZero(rounding_mode, lost_fraction, bit: 0)) {
1628 if (omsb == 0)
1629 exponent = semantics->minExponent;
1630
1631 incrementSignificand();
1632 omsb = significandMSB() + 1;
1633
1634 /* Did the significand increment overflow? */
1635 if (omsb == (unsigned) semantics->precision + 1) {
1636 /* Renormalize by incrementing the exponent and shifting our
1637 significand right one. However if we already have the
1638 maximum exponent we overflow to infinity. */
1639 if (exponent == semantics->maxExponent)
1640 // Invoke overflow handling with a rounding mode that will guarantee
1641 // that the result gets turned into the correct infinity representation.
1642 // This is needed instead of just setting the category to infinity to
1643 // account for 8-bit floating point types that have no inf, only NaN.
1644 return handleOverflow(rounding_mode: sign ? rmTowardNegative : rmTowardPositive);
1645
1646 shiftSignificandRight(bits: 1);
1647
1648 return opInexact;
1649 }
1650
1651 // The all-ones values is an overflow if NaN is all ones. If NaN is
1652 // represented by negative zero, then it is a valid finite value.
1653 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1654 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1655 exponent == semantics->maxExponent && isSignificandAllOnes())
1656 return handleOverflow(rounding_mode);
1657 }
1658
1659 /* The normal case - we were and are not denormal, and any
1660 significand increment above didn't overflow. */
1661 if (omsb == semantics->precision)
1662 return opInexact;
1663
1664 /* We have a non-zero denormal. */
1665 assert(omsb < semantics->precision);
1666
1667 /* Canonicalize zeroes. */
1668 if (omsb == 0) {
1669 category = fcZero;
1670 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1671 sign = false;
1672 // This condition handles the case where the semantics
1673 // does not have zero but uses the all-zero encoding
1674 // to represent the smallest normal value.
1675 if (!semantics->hasZero)
1676 makeSmallestNormalized(Negative: false);
1677 }
1678
1679 /* The fcZero case is a denormal that underflowed to zero. */
1680 return (opStatus) (opUnderflow | opInexact);
1681}
1682
1683APFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1684 bool subtract) {
1685 switch (PackCategoriesIntoKey(category, rhs.category)) {
1686 default:
1687 llvm_unreachable(nullptr);
1688
1689 case PackCategoriesIntoKey(fcZero, fcNaN):
1690 case PackCategoriesIntoKey(fcNormal, fcNaN):
1691 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1692 assign(rhs);
1693 [[fallthrough]];
1694 case PackCategoriesIntoKey(fcNaN, fcZero):
1695 case PackCategoriesIntoKey(fcNaN, fcNormal):
1696 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1697 case PackCategoriesIntoKey(fcNaN, fcNaN):
1698 if (isSignaling()) {
1699 makeQuiet();
1700 return opInvalidOp;
1701 }
1702 return rhs.isSignaling() ? opInvalidOp : opOK;
1703
1704 case PackCategoriesIntoKey(fcNormal, fcZero):
1705 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1706 case PackCategoriesIntoKey(fcInfinity, fcZero):
1707 return opOK;
1708
1709 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1710 case PackCategoriesIntoKey(fcZero, fcInfinity):
1711 category = fcInfinity;
1712 sign = rhs.sign ^ subtract;
1713 return opOK;
1714
1715 case PackCategoriesIntoKey(fcZero, fcNormal):
1716 assign(rhs);
1717 sign = rhs.sign ^ subtract;
1718 return opOK;
1719
1720 case PackCategoriesIntoKey(fcZero, fcZero):
1721 /* Sign depends on rounding mode; handled by caller. */
1722 return opOK;
1723
1724 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1725 /* Differently signed infinities can only be validly
1726 subtracted. */
1727 if (((sign ^ rhs.sign)!=0) != subtract) {
1728 makeNaN();
1729 return opInvalidOp;
1730 }
1731
1732 return opOK;
1733
1734 case PackCategoriesIntoKey(fcNormal, fcNormal):
1735 return opDivByZero;
1736 }
1737}
1738
1739/* Add or subtract two normal numbers. */
1740lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1741 bool subtract) {
1742 [[maybe_unused]] integerPart carry = 0;
1743 lostFraction lost_fraction;
1744
1745 /* Determine if the operation on the absolute values is effectively
1746 an addition or subtraction. */
1747 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1748
1749 /* Are we bigger exponent-wise than the RHS? */
1750 int bits = exponent - rhs.exponent;
1751
1752 /* Subtraction is more subtle than one might naively expect. */
1753 if (subtract) {
1754 if ((bits < 0) && !semantics->hasSignedRepr)
1755 llvm_unreachable(
1756 "This floating point format does not support signed values");
1757
1758 IEEEFloat temp_rhs(rhs);
1759 bool lost_fraction_is_from_rhs = false;
1760
1761 if (bits == 0)
1762 lost_fraction = lfExactlyZero;
1763 else if (bits > 0) {
1764 lost_fraction = temp_rhs.shiftSignificandRight(bits: bits - 1);
1765 lost_fraction_is_from_rhs = true;
1766 shiftSignificandLeft(bits: 1);
1767 } else {
1768 lost_fraction = shiftSignificandRight(bits: -bits - 1);
1769 temp_rhs.shiftSignificandLeft(bits: 1);
1770 }
1771
1772 // Should we reverse the subtraction.
1773 cmpResult cmp_result = compareAbsoluteValue(rhs: temp_rhs);
1774 if (cmp_result == cmpLessThan) {
1775 bool borrow =
1776 lost_fraction != lfExactlyZero && !lost_fraction_is_from_rhs;
1777 if (borrow) {
1778 // The lost fraction is being subtracted, borrow from the significand
1779 // and invert `lost_fraction`.
1780 if (lost_fraction == lfLessThanHalf)
1781 lost_fraction = lfMoreThanHalf;
1782 else if (lost_fraction == lfMoreThanHalf)
1783 lost_fraction = lfLessThanHalf;
1784 }
1785 carry = temp_rhs.subtractSignificand(rhs: *this, borrow);
1786 copySignificand(rhs: temp_rhs);
1787 sign = !sign;
1788 } else if (cmp_result == cmpGreaterThan) {
1789 bool borrow = lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs;
1790 if (borrow) {
1791 // The lost fraction is being subtracted, borrow from the significand
1792 // and invert `lost_fraction`.
1793 if (lost_fraction == lfLessThanHalf)
1794 lost_fraction = lfMoreThanHalf;
1795 else if (lost_fraction == lfMoreThanHalf)
1796 lost_fraction = lfLessThanHalf;
1797 }
1798 carry = subtractSignificand(rhs: temp_rhs, borrow);
1799 } else { // cmpEqual
1800 zeroSignificand();
1801 if (lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs) {
1802 // rhs is slightly larger due to the lost fraction, flip the sign.
1803 sign = !sign;
1804 }
1805 }
1806
1807 /* The code above is intended to ensure that no borrow is
1808 necessary. */
1809 assert(!carry);
1810 } else {
1811 if (bits > 0) {
1812 IEEEFloat temp_rhs(rhs);
1813
1814 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1815 carry = addSignificand(rhs: temp_rhs);
1816 } else {
1817 lost_fraction = shiftSignificandRight(bits: -bits);
1818 carry = addSignificand(rhs);
1819 }
1820
1821 /* We have a guard bit; generating a carry cannot happen. */
1822 assert(!carry);
1823 }
1824
1825 return lost_fraction;
1826}
1827
1828APFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1829 switch (PackCategoriesIntoKey(category, rhs.category)) {
1830 default:
1831 llvm_unreachable(nullptr);
1832
1833 case PackCategoriesIntoKey(fcZero, fcNaN):
1834 case PackCategoriesIntoKey(fcNormal, fcNaN):
1835 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1836 assign(rhs);
1837 sign = false;
1838 [[fallthrough]];
1839 case PackCategoriesIntoKey(fcNaN, fcZero):
1840 case PackCategoriesIntoKey(fcNaN, fcNormal):
1841 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1842 case PackCategoriesIntoKey(fcNaN, fcNaN):
1843 sign ^= rhs.sign; // restore the original sign
1844 if (isSignaling()) {
1845 makeQuiet();
1846 return opInvalidOp;
1847 }
1848 return rhs.isSignaling() ? opInvalidOp : opOK;
1849
1850 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1851 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1852 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1853 category = fcInfinity;
1854 return opOK;
1855
1856 case PackCategoriesIntoKey(fcZero, fcNormal):
1857 case PackCategoriesIntoKey(fcNormal, fcZero):
1858 case PackCategoriesIntoKey(fcZero, fcZero):
1859 category = fcZero;
1860 return opOK;
1861
1862 case PackCategoriesIntoKey(fcZero, fcInfinity):
1863 case PackCategoriesIntoKey(fcInfinity, fcZero):
1864 makeNaN();
1865 return opInvalidOp;
1866
1867 case PackCategoriesIntoKey(fcNormal, fcNormal):
1868 return opOK;
1869 }
1870}
1871
1872APFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1873 switch (PackCategoriesIntoKey(category, rhs.category)) {
1874 default:
1875 llvm_unreachable(nullptr);
1876
1877 case PackCategoriesIntoKey(fcZero, fcNaN):
1878 case PackCategoriesIntoKey(fcNormal, fcNaN):
1879 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1880 assign(rhs);
1881 sign = false;
1882 [[fallthrough]];
1883 case PackCategoriesIntoKey(fcNaN, fcZero):
1884 case PackCategoriesIntoKey(fcNaN, fcNormal):
1885 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1886 case PackCategoriesIntoKey(fcNaN, fcNaN):
1887 sign ^= rhs.sign; // restore the original sign
1888 if (isSignaling()) {
1889 makeQuiet();
1890 return opInvalidOp;
1891 }
1892 return rhs.isSignaling() ? opInvalidOp : opOK;
1893
1894 case PackCategoriesIntoKey(fcInfinity, fcZero):
1895 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1896 case PackCategoriesIntoKey(fcZero, fcInfinity):
1897 case PackCategoriesIntoKey(fcZero, fcNormal):
1898 return opOK;
1899
1900 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1901 category = fcZero;
1902 return opOK;
1903
1904 case PackCategoriesIntoKey(fcNormal, fcZero):
1905 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1906 makeNaN(SNaN: false, Negative: sign);
1907 else
1908 category = fcInfinity;
1909 return opDivByZero;
1910
1911 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1912 case PackCategoriesIntoKey(fcZero, fcZero):
1913 makeNaN();
1914 return opInvalidOp;
1915
1916 case PackCategoriesIntoKey(fcNormal, fcNormal):
1917 return opOK;
1918 }
1919}
1920
1921APFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1922 switch (PackCategoriesIntoKey(category, rhs.category)) {
1923 default:
1924 llvm_unreachable(nullptr);
1925
1926 case PackCategoriesIntoKey(fcZero, fcNaN):
1927 case PackCategoriesIntoKey(fcNormal, fcNaN):
1928 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1929 assign(rhs);
1930 [[fallthrough]];
1931 case PackCategoriesIntoKey(fcNaN, fcZero):
1932 case PackCategoriesIntoKey(fcNaN, fcNormal):
1933 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1934 case PackCategoriesIntoKey(fcNaN, fcNaN):
1935 if (isSignaling()) {
1936 makeQuiet();
1937 return opInvalidOp;
1938 }
1939 return rhs.isSignaling() ? opInvalidOp : opOK;
1940
1941 case PackCategoriesIntoKey(fcZero, fcInfinity):
1942 case PackCategoriesIntoKey(fcZero, fcNormal):
1943 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1944 return opOK;
1945
1946 case PackCategoriesIntoKey(fcNormal, fcZero):
1947 case PackCategoriesIntoKey(fcInfinity, fcZero):
1948 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1949 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1950 case PackCategoriesIntoKey(fcZero, fcZero):
1951 makeNaN();
1952 return opInvalidOp;
1953
1954 case PackCategoriesIntoKey(fcNormal, fcNormal):
1955 return opOK;
1956 }
1957}
1958
1959APFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1960 switch (PackCategoriesIntoKey(category, rhs.category)) {
1961 default:
1962 llvm_unreachable(nullptr);
1963
1964 case PackCategoriesIntoKey(fcZero, fcNaN):
1965 case PackCategoriesIntoKey(fcNormal, fcNaN):
1966 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1967 assign(rhs);
1968 [[fallthrough]];
1969 case PackCategoriesIntoKey(fcNaN, fcZero):
1970 case PackCategoriesIntoKey(fcNaN, fcNormal):
1971 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1972 case PackCategoriesIntoKey(fcNaN, fcNaN):
1973 if (isSignaling()) {
1974 makeQuiet();
1975 return opInvalidOp;
1976 }
1977 return rhs.isSignaling() ? opInvalidOp : opOK;
1978
1979 case PackCategoriesIntoKey(fcZero, fcInfinity):
1980 case PackCategoriesIntoKey(fcZero, fcNormal):
1981 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1982 return opOK;
1983
1984 case PackCategoriesIntoKey(fcNormal, fcZero):
1985 case PackCategoriesIntoKey(fcInfinity, fcZero):
1986 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1987 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1988 case PackCategoriesIntoKey(fcZero, fcZero):
1989 makeNaN();
1990 return opInvalidOp;
1991
1992 case PackCategoriesIntoKey(fcNormal, fcNormal):
1993 return opDivByZero; // fake status, indicating this is not a special case
1994 }
1995}
1996
1997/* Change sign. */
1998void IEEEFloat::changeSign() {
1999 // With NaN-as-negative-zero, neither NaN or negative zero can change
2000 // their signs.
2001 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2002 (isZero() || isNaN()))
2003 return;
2004 /* Look mummy, this one's easy. */
2005 sign = !sign;
2006}
2007
2008/* Normalized addition or subtraction. */
2009APFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2010 roundingMode rounding_mode,
2011 bool subtract) {
2012 opStatus fs = addOrSubtractSpecials(rhs, subtract);
2013
2014 /* This return code means it was not a simple case. */
2015 if (fs == opDivByZero) {
2016 lostFraction lost_fraction;
2017
2018 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2019 fs = normalize(rounding_mode, lost_fraction);
2020
2021 /* Can only be zero if we lost no fraction. */
2022 assert(category != fcZero || lost_fraction == lfExactlyZero);
2023 }
2024
2025 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2026 positive zero unless rounding to minus infinity, except that
2027 adding two like-signed zeroes gives that zero. */
2028 if (category == fcZero) {
2029 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2030 sign = (rounding_mode == rmTowardNegative);
2031 // NaN-in-negative-zero means zeros need to be normalized to +0.
2032 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2033 sign = false;
2034 }
2035
2036 return fs;
2037}
2038
2039/* Normalized addition. */
2040APFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
2041 roundingMode rounding_mode) {
2042 return addOrSubtract(rhs, rounding_mode, subtract: false);
2043}
2044
2045/* Normalized subtraction. */
2046APFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
2047 roundingMode rounding_mode) {
2048 return addOrSubtract(rhs, rounding_mode, subtract: true);
2049}
2050
2051/* Normalized multiply. */
2052APFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
2053 roundingMode rounding_mode) {
2054 sign ^= rhs.sign;
2055 opStatus fs = multiplySpecials(rhs);
2056
2057 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2058 sign = false;
2059 if (isFiniteNonZero()) {
2060 lostFraction lost_fraction = multiplySignificand(rhs);
2061 fs = normalize(rounding_mode, lost_fraction);
2062 if (lost_fraction != lfExactlyZero)
2063 fs = (opStatus) (fs | opInexact);
2064 }
2065
2066 return fs;
2067}
2068
2069/* Normalized divide. */
2070APFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
2071 roundingMode rounding_mode) {
2072 sign ^= rhs.sign;
2073 opStatus fs = divideSpecials(rhs);
2074
2075 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2076 sign = false;
2077 if (isFiniteNonZero()) {
2078 lostFraction lost_fraction = divideSignificand(rhs);
2079 fs = normalize(rounding_mode, lost_fraction);
2080 if (lost_fraction != lfExactlyZero)
2081 fs = (opStatus) (fs | opInexact);
2082 }
2083
2084 return fs;
2085}
2086
2087/* Normalized remainder. */
2088APFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
2089 unsigned int origSign = sign;
2090
2091 // First handle the special cases.
2092 opStatus fs = remainderSpecials(rhs);
2093 if (fs != opDivByZero)
2094 return fs;
2095
2096 fs = opOK;
2097
2098 // Make sure the current value is less than twice the denom. If the addition
2099 // did not succeed (an overflow has happened), which means that the finite
2100 // value we currently posses must be less than twice the denom (as we are
2101 // using the same semantics).
2102 IEEEFloat P2 = rhs;
2103 if (P2.add(rhs, rounding_mode: rmNearestTiesToEven) == opOK) {
2104 fs = mod(P2);
2105 assert(fs == opOK);
2106 }
2107
2108 // Lets work with absolute numbers.
2109 IEEEFloat P = rhs;
2110 P.sign = false;
2111 sign = false;
2112
2113 //
2114 // To calculate the remainder we use the following scheme.
2115 //
2116 // The remainder is defained as follows:
2117 //
2118 // remainder = numer - rquot * denom = x - r * p
2119 //
2120 // Where r is the result of: x/p, rounded toward the nearest integral value
2121 // (with halfway cases rounded toward the even number).
2122 //
2123 // Currently, (after x mod 2p):
2124 // r is the number of 2p's present inside x, which is inherently, an even
2125 // number of p's.
2126 //
2127 // We may split the remaining calculation into 4 options:
2128 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2129 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2130 // are done as well.
2131 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2132 // to subtract 1p at least once.
2133 // - if x >= p then we must subtract p at least once, as x must be a
2134 // remainder.
2135 //
2136 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2137 //
2138 // We can now split the remaining calculation to the following 3 options:
2139 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2140 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2141 // must round up to the next even number. so we must subtract p once more.
2142 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2143 // integral, and subtract p once more.
2144 //
2145
2146 // Extend the semantics to prevent an overflow/underflow or inexact result.
2147 bool losesInfo;
2148 fltSemantics extendedSemantics = *semantics;
2149 extendedSemantics.maxExponent++;
2150 extendedSemantics.minExponent--;
2151 extendedSemantics.precision += 2;
2152
2153 IEEEFloat VEx = *this;
2154 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2155 assert(fs == opOK && !losesInfo);
2156 IEEEFloat PEx = P;
2157 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2158 assert(fs == opOK && !losesInfo);
2159
2160 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2161 // any fraction.
2162 fs = VEx.add(rhs: VEx, rounding_mode: rmNearestTiesToEven);
2163 assert(fs == opOK);
2164
2165 if (VEx.compare(PEx) == cmpGreaterThan) {
2166 fs = subtract(rhs: P, rounding_mode: rmNearestTiesToEven);
2167 assert(fs == opOK);
2168
2169 // Make VEx = this.add(this), but because we have different semantics, we do
2170 // not want to `convert` again, so we just subtract PEx twice (which equals
2171 // to the desired value).
2172 fs = VEx.subtract(rhs: PEx, rounding_mode: rmNearestTiesToEven);
2173 assert(fs == opOK);
2174 fs = VEx.subtract(rhs: PEx, rounding_mode: rmNearestTiesToEven);
2175 assert(fs == opOK);
2176
2177 cmpResult result = VEx.compare(PEx);
2178 if (result == cmpGreaterThan || result == cmpEqual) {
2179 fs = subtract(rhs: P, rounding_mode: rmNearestTiesToEven);
2180 assert(fs == opOK);
2181 }
2182 }
2183
2184 if (isZero()) {
2185 sign = origSign; // IEEE754 requires this
2186 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2187 // But some 8-bit floats only have positive 0.
2188 sign = false;
2189 } else {
2190 sign ^= origSign;
2191 }
2192 return fs;
2193}
2194
2195/* Normalized llvm frem (C fmod). */
2196APFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
2197 opStatus fs = modSpecials(rhs);
2198 unsigned int origSign = sign;
2199
2200 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2201 compareAbsoluteValue(rhs) != cmpLessThan) {
2202 int Exp = ilogb(Arg: *this) - ilogb(Arg: rhs);
2203 IEEEFloat V = scalbn(X: rhs, Exp, rmNearestTiesToEven);
2204 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2205 // check for it.
2206 if (V.isNaN() || compareAbsoluteValue(rhs: V) == cmpLessThan)
2207 V = scalbn(X: rhs, Exp: Exp - 1, rmNearestTiesToEven);
2208 V.sign = sign;
2209
2210 fs = subtract(rhs: V, rounding_mode: rmNearestTiesToEven);
2211
2212 // When the semantics supports zero, this loop's
2213 // exit-condition is handled by the 'isFiniteNonZero'
2214 // category check above. However, when the semantics
2215 // does not have 'fcZero' and we have reached the
2216 // minimum possible value, (and any further subtract
2217 // will underflow to the same value) explicitly
2218 // provide an exit-path here.
2219 if (!semantics->hasZero && this->isSmallest())
2220 break;
2221
2222 assert(fs==opOK);
2223 }
2224 if (isZero()) {
2225 sign = origSign; // fmod requires this
2226 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2227 sign = false;
2228 }
2229 return fs;
2230}
2231
2232/* Normalized fused-multiply-add. */
2233APFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
2234 const IEEEFloat &addend,
2235 roundingMode rounding_mode) {
2236 opStatus fs;
2237
2238 /* Post-multiplication sign, before addition. */
2239 sign ^= multiplicand.sign;
2240
2241 /* If and only if all arguments are normal do we need to do an
2242 extended-precision calculation. */
2243 if (isFiniteNonZero() &&
2244 multiplicand.isFiniteNonZero() &&
2245 addend.isFinite()) {
2246 lostFraction lost_fraction;
2247
2248 lost_fraction = multiplySignificand(rhs: multiplicand, addend);
2249 fs = normalize(rounding_mode, lost_fraction);
2250 if (lost_fraction != lfExactlyZero)
2251 fs = (opStatus) (fs | opInexact);
2252
2253 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2254 positive zero unless rounding to minus infinity, except that
2255 adding two like-signed zeroes gives that zero. */
2256 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2257 sign = (rounding_mode == rmTowardNegative);
2258 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2259 sign = false;
2260 }
2261 } else {
2262 fs = multiplySpecials(rhs: multiplicand);
2263
2264 /* FS can only be opOK or opInvalidOp. There is no more work
2265 to do in the latter case. The IEEE-754R standard says it is
2266 implementation-defined in this case whether, if ADDEND is a
2267 quiet NaN, we raise invalid op; this implementation does so.
2268
2269 If we need to do the addition we can do so with normal
2270 precision. */
2271 if (fs == opOK)
2272 fs = addOrSubtract(rhs: addend, rounding_mode, subtract: false);
2273 }
2274
2275 return fs;
2276}
2277
2278/* Rounding-mode correct round to integral value. */
2279APFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2280 if (isInfinity())
2281 // [IEEE Std 754-2008 6.1]:
2282 // The behavior of infinity in floating-point arithmetic is derived from the
2283 // limiting cases of real arithmetic with operands of arbitrarily
2284 // large magnitude, when such a limit exists.
2285 // ...
2286 // Operations on infinite operands are usually exact and therefore signal no
2287 // exceptions ...
2288 return opOK;
2289
2290 if (isNaN()) {
2291 if (isSignaling()) {
2292 // [IEEE Std 754-2008 6.2]:
2293 // Under default exception handling, any operation signaling an invalid
2294 // operation exception and for which a floating-point result is to be
2295 // delivered shall deliver a quiet NaN.
2296 makeQuiet();
2297 // [IEEE Std 754-2008 6.2]:
2298 // Signaling NaNs shall be reserved operands that, under default exception
2299 // handling, signal the invalid operation exception(see 7.2) for every
2300 // general-computational and signaling-computational operation except for
2301 // the conversions described in 5.12.
2302 return opInvalidOp;
2303 } else {
2304 // [IEEE Std 754-2008 6.2]:
2305 // For an operation with quiet NaN inputs, other than maximum and minimum
2306 // operations, if a floating-point result is to be delivered the result
2307 // shall be a quiet NaN which should be one of the input NaNs.
2308 // ...
2309 // Every general-computational and quiet-computational operation involving
2310 // one or more input NaNs, none of them signaling, shall signal no
2311 // exception, except fusedMultiplyAdd might signal the invalid operation
2312 // exception(see 7.2).
2313 return opOK;
2314 }
2315 }
2316
2317 if (isZero()) {
2318 // [IEEE Std 754-2008 6.3]:
2319 // ... the sign of the result of conversions, the quantize operation, the
2320 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2321 // the sign of the first or only operand.
2322 return opOK;
2323 }
2324
2325 // If the exponent is large enough, we know that this value is already
2326 // integral, and the arithmetic below would potentially cause it to saturate
2327 // to +/-Inf. Bail out early instead.
2328 if (exponent + 1 >= (int)APFloat::semanticsPrecision(semantics: *semantics))
2329 return opOK;
2330
2331 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2332 // precision of our format, and then subtract it back off again. The choice
2333 // of rounding modes for the addition/subtraction determines the rounding mode
2334 // for our integral rounding as well.
2335 // NOTE: When the input value is negative, we do subtraction followed by
2336 // addition instead.
2337 APInt IntegerConstant(NextPowerOf2(A: APFloat::semanticsPrecision(semantics: *semantics)),
2338 1);
2339 IntegerConstant <<= APFloat::semanticsPrecision(semantics: *semantics) - 1;
2340 IEEEFloat MagicConstant(*semantics);
2341 opStatus fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2342 rmNearestTiesToEven);
2343 assert(fs == opOK);
2344 MagicConstant.sign = sign;
2345
2346 // Preserve the input sign so that we can handle the case of zero result
2347 // correctly.
2348 bool inputSign = isNegative();
2349
2350 fs = add(rhs: MagicConstant, rounding_mode);
2351
2352 // Current value and 'MagicConstant' are both integers, so the result of the
2353 // subtraction is always exact according to Sterbenz' lemma.
2354 subtract(rhs: MagicConstant, rounding_mode);
2355
2356 // Restore the input sign.
2357 if (inputSign != isNegative())
2358 changeSign();
2359
2360 return fs;
2361}
2362
2363/* Comparison requires normalized numbers. */
2364APFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2365 assert(semantics == rhs.semantics);
2366
2367 switch (PackCategoriesIntoKey(category, rhs.category)) {
2368 default:
2369 llvm_unreachable(nullptr);
2370
2371 case PackCategoriesIntoKey(fcNaN, fcZero):
2372 case PackCategoriesIntoKey(fcNaN, fcNormal):
2373 case PackCategoriesIntoKey(fcNaN, fcInfinity):
2374 case PackCategoriesIntoKey(fcNaN, fcNaN):
2375 case PackCategoriesIntoKey(fcZero, fcNaN):
2376 case PackCategoriesIntoKey(fcNormal, fcNaN):
2377 case PackCategoriesIntoKey(fcInfinity, fcNaN):
2378 return cmpUnordered;
2379
2380 case PackCategoriesIntoKey(fcInfinity, fcNormal):
2381 case PackCategoriesIntoKey(fcInfinity, fcZero):
2382 case PackCategoriesIntoKey(fcNormal, fcZero):
2383 if (sign)
2384 return cmpLessThan;
2385 else
2386 return cmpGreaterThan;
2387
2388 case PackCategoriesIntoKey(fcNormal, fcInfinity):
2389 case PackCategoriesIntoKey(fcZero, fcInfinity):
2390 case PackCategoriesIntoKey(fcZero, fcNormal):
2391 if (rhs.sign)
2392 return cmpGreaterThan;
2393 else
2394 return cmpLessThan;
2395
2396 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2397 if (sign == rhs.sign)
2398 return cmpEqual;
2399 else if (sign)
2400 return cmpLessThan;
2401 else
2402 return cmpGreaterThan;
2403
2404 case PackCategoriesIntoKey(fcZero, fcZero):
2405 return cmpEqual;
2406
2407 case PackCategoriesIntoKey(fcNormal, fcNormal):
2408 break;
2409 }
2410
2411 cmpResult result;
2412 /* Two normal numbers. Do they have the same sign? */
2413 if (sign != rhs.sign) {
2414 if (sign)
2415 result = cmpLessThan;
2416 else
2417 result = cmpGreaterThan;
2418 } else {
2419 /* Compare absolute values; invert result if negative. */
2420 result = compareAbsoluteValue(rhs);
2421
2422 if (sign) {
2423 if (result == cmpLessThan)
2424 result = cmpGreaterThan;
2425 else if (result == cmpGreaterThan)
2426 result = cmpLessThan;
2427 }
2428 }
2429
2430 return result;
2431}
2432
2433/// IEEEFloat::convert - convert a value of one floating point type to another.
2434/// The return value corresponds to the IEEE754 exceptions. *losesInfo
2435/// records whether the transformation lost information, i.e. whether
2436/// converting the result back to the original type will produce the
2437/// original value (this is almost the same as return value==fsOK, but there
2438/// are edge cases where this is not so).
2439
2440APFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2441 roundingMode rounding_mode,
2442 bool *losesInfo) {
2443 opStatus fs;
2444 const fltSemantics &fromSemantics = *semantics;
2445 bool is_signaling = isSignaling();
2446
2447 lostFraction lostFraction = lfExactlyZero;
2448 unsigned newPartCount = partCountForBits(bits: toSemantics.precision + 1);
2449 unsigned oldPartCount = partCount();
2450 int shift = toSemantics.precision - fromSemantics.precision;
2451
2452 bool X86SpecialNan = false;
2453 if (&fromSemantics == &APFloatBase::semX87DoubleExtended &&
2454 &toSemantics != &APFloatBase::semX87DoubleExtended && category == fcNaN &&
2455 (!(*significandParts() & 0x8000000000000000ULL) ||
2456 !(*significandParts() & 0x4000000000000000ULL))) {
2457 // x86 has some unusual NaNs which cannot be represented in any other
2458 // format; note them here.
2459 X86SpecialNan = true;
2460 }
2461
2462 // If this is a truncation of a denormal number, and the target semantics
2463 // has larger exponent range than the source semantics (this can happen
2464 // when truncating from PowerPC double-double to double format), the
2465 // right shift could lose result mantissa bits. Adjust exponent instead
2466 // of performing excessive shift.
2467 // Also do a similar trick in case shifting denormal would produce zero
2468 // significand as this case isn't handled correctly by normalize.
2469 if (shift < 0 && isFiniteNonZero()) {
2470 int omsb = significandMSB() + 1;
2471 int exponentChange = omsb - fromSemantics.precision;
2472 if (exponent + exponentChange < toSemantics.minExponent)
2473 exponentChange = toSemantics.minExponent - exponent;
2474 exponentChange = std::max(a: exponentChange, b: shift);
2475 if (exponentChange < 0) {
2476 shift -= exponentChange;
2477 exponent += exponentChange;
2478 } else if (omsb <= -shift) {
2479 exponentChange = omsb + shift - 1; // leave at least one bit set
2480 shift -= exponentChange;
2481 exponent += exponentChange;
2482 }
2483 }
2484
2485 // If this is a truncation, perform the shift before we narrow the storage.
2486 if (shift < 0 && (isFiniteNonZero() ||
2487 (category == fcNaN && semantics->nonFiniteBehavior !=
2488 fltNonfiniteBehavior::NanOnly)))
2489 lostFraction = shiftRight(dst: significandParts(), parts: oldPartCount, bits: -shift);
2490
2491 // Fix the storage so it can hold to new value.
2492 if (newPartCount > oldPartCount) {
2493 // The new type requires more storage; make it available.
2494 integerPart *newParts;
2495 newParts = new integerPart[newPartCount];
2496 APInt::tcSet(newParts, 0, newPartCount);
2497 if (isFiniteNonZero() || category==fcNaN)
2498 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2499 freeSignificand();
2500 significand.parts = newParts;
2501 } else if (newPartCount == 1 && oldPartCount != 1) {
2502 // Switch to built-in storage for a single part.
2503 integerPart newPart = 0;
2504 if (isFiniteNonZero() || category==fcNaN)
2505 newPart = significandParts()[0];
2506 freeSignificand();
2507 significand.part = newPart;
2508 }
2509
2510 // Now that we have the right storage, switch the semantics.
2511 semantics = &toSemantics;
2512
2513 // If this is an extension, perform the shift now that the storage is
2514 // available.
2515 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2516 APInt::tcShiftLeft(significandParts(), Words: newPartCount, Count: shift);
2517
2518 if (isFiniteNonZero()) {
2519 fs = normalize(rounding_mode, lost_fraction: lostFraction);
2520 *losesInfo = (fs != opOK);
2521 } else if (category == fcNaN) {
2522 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2523 *losesInfo =
2524 fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;
2525 makeNaN(SNaN: false, Negative: sign);
2526 return is_signaling ? opInvalidOp : opOK;
2527 }
2528
2529 // If NaN is negative zero, we need to create a new NaN to avoid converting
2530 // NaN to -Inf.
2531 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2532 semantics->nanEncoding != fltNanEncoding::NegativeZero)
2533 makeNaN(SNaN: false, Negative: false);
2534
2535 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2536
2537 // For x87 extended precision, we want to make a NaN, not a special NaN if
2538 // the input wasn't special either.
2539 if (!X86SpecialNan && semantics == &APFloatBase::semX87DoubleExtended)
2540 APInt::tcSetBit(significandParts(), bit: semantics->precision - 1);
2541
2542 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2543 // This also guarantees that a sNaN does not become Inf on a truncation
2544 // that loses all payload bits.
2545 if (is_signaling) {
2546 makeQuiet();
2547 fs = opInvalidOp;
2548 } else {
2549 fs = opOK;
2550 }
2551 } else if (category == fcInfinity &&
2552 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2553 makeNaN(SNaN: false, Negative: sign);
2554 *losesInfo = true;
2555 fs = opInexact;
2556 } else if (category == fcZero &&
2557 semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2558 // Negative zero loses info, but positive zero doesn't.
2559 *losesInfo =
2560 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2561 fs = *losesInfo ? opInexact : opOK;
2562 // NaN is negative zero means -0 -> +0, which can lose information
2563 sign = false;
2564 } else {
2565 *losesInfo = false;
2566 fs = opOK;
2567 }
2568
2569 if (category == fcZero && !semantics->hasZero)
2570 makeSmallestNormalized(Negative: false);
2571 return fs;
2572}
2573
2574/* Convert a floating point number to an integer according to the
2575 rounding mode. If the rounded integer value is out of range this
2576 returns an invalid operation exception and the contents of the
2577 destination parts are unspecified. If the rounded value is in
2578 range but the floating point number is not the exact integer, the C
2579 standard doesn't require an inexact exception to be raised. IEEE
2580 854 does require it so we do that.
2581
2582 Note that for conversions to integer type the C standard requires
2583 round-to-zero to always be used. */
2584APFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2585 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2586 roundingMode rounding_mode, bool *isExact) const {
2587 *isExact = false;
2588
2589 /* Handle the three special cases first. */
2590 if (category == fcInfinity || category == fcNaN)
2591 return opInvalidOp;
2592
2593 unsigned dstPartsCount = partCountForBits(bits: width);
2594 assert(dstPartsCount <= parts.size() && "Integer too big");
2595
2596 if (category == fcZero) {
2597 APInt::tcSet(parts.data(), 0, dstPartsCount);
2598 // Negative zero can't be represented as an int.
2599 *isExact = !sign;
2600 return opOK;
2601 }
2602
2603 const integerPart *src = significandParts();
2604
2605 unsigned truncatedBits;
2606 /* Step 1: place our absolute value, with any fraction truncated, in
2607 the destination. */
2608 if (exponent < 0) {
2609 /* Our absolute value is less than one; truncate everything. */
2610 APInt::tcSet(parts.data(), 0, dstPartsCount);
2611 /* For exponent -1 the integer bit represents .5, look at that.
2612 For smaller exponents leftmost truncated bit is 0. */
2613 truncatedBits = semantics->precision -1U - exponent;
2614 } else {
2615 /* We want the most significant (exponent + 1) bits; the rest are
2616 truncated. */
2617 unsigned int bits = exponent + 1U;
2618
2619 /* Hopelessly large in magnitude? */
2620 if (bits > width)
2621 return opInvalidOp;
2622
2623 if (bits < semantics->precision) {
2624 /* We truncate (semantics->precision - bits) bits. */
2625 truncatedBits = semantics->precision - bits;
2626 APInt::tcExtract(parts.data(), dstCount: dstPartsCount, src, srcBits: bits, srcLSB: truncatedBits);
2627 } else {
2628 /* We want at least as many bits as are available. */
2629 APInt::tcExtract(parts.data(), dstCount: dstPartsCount, src, srcBits: semantics->precision,
2630 srcLSB: 0);
2631 APInt::tcShiftLeft(parts.data(), Words: dstPartsCount,
2632 Count: bits - semantics->precision);
2633 truncatedBits = 0;
2634 }
2635 }
2636
2637 /* Step 2: work out any lost fraction, and increment the absolute
2638 value if we would round away from zero. */
2639 lostFraction lost_fraction;
2640 if (truncatedBits) {
2641 lost_fraction = lostFractionThroughTruncation(parts: src, partCount: partCount(),
2642 bits: truncatedBits);
2643 if (lost_fraction != lfExactlyZero &&
2644 roundAwayFromZero(rounding_mode, lost_fraction, bit: truncatedBits)) {
2645 if (APInt::tcIncrement(dst: parts.data(), parts: dstPartsCount))
2646 return opInvalidOp; /* Overflow. */
2647 }
2648 } else {
2649 lost_fraction = lfExactlyZero;
2650 }
2651
2652 /* Step 3: check if we fit in the destination. */
2653 unsigned int omsb = APInt::tcMSB(parts: parts.data(), n: dstPartsCount) + 1;
2654
2655 if (sign) {
2656 if (!isSigned) {
2657 /* Negative numbers cannot be represented as unsigned. */
2658 if (omsb != 0)
2659 return opInvalidOp;
2660 } else {
2661 /* It takes omsb bits to represent the unsigned integer value.
2662 We lose a bit for the sign, but care is needed as the
2663 maximally negative integer is a special case. */
2664 if (omsb == width &&
2665 APInt::tcLSB(parts.data(), n: dstPartsCount) + 1 != omsb)
2666 return opInvalidOp;
2667
2668 /* This case can happen because of rounding. */
2669 if (omsb > width)
2670 return opInvalidOp;
2671 }
2672
2673 APInt::tcNegate (parts.data(), dstPartsCount);
2674 } else {
2675 if (omsb >= width + !isSigned)
2676 return opInvalidOp;
2677 }
2678
2679 if (lost_fraction == lfExactlyZero) {
2680 *isExact = true;
2681 return opOK;
2682 }
2683 return opInexact;
2684}
2685
2686/* Same as convertToSignExtendedInteger, except we provide
2687 deterministic values in case of an invalid operation exception,
2688 namely zero for NaNs and the minimal or maximal value respectively
2689 for underflow or overflow.
2690 The *isExact output tells whether the result is exact, in the sense
2691 that converting it back to the original floating point type produces
2692 the original value. This is almost equivalent to result==opOK,
2693 except for negative zeroes.
2694*/
2695APFloat::opStatus
2696IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2697 unsigned int width, bool isSigned,
2698 roundingMode rounding_mode, bool *isExact) const {
2699 opStatus fs = convertToSignExtendedInteger(parts, width, isSigned,
2700 rounding_mode, isExact);
2701
2702 if (fs == opInvalidOp) {
2703 unsigned int bits, dstPartsCount;
2704
2705 dstPartsCount = partCountForBits(bits: width);
2706 assert(dstPartsCount <= parts.size() && "Integer too big");
2707
2708 if (category == fcNaN)
2709 bits = 0;
2710 else if (sign)
2711 bits = isSigned;
2712 else
2713 bits = width - isSigned;
2714
2715 tcSetLeastSignificantBits(dst: parts.data(), parts: dstPartsCount, bits);
2716 if (sign && isSigned)
2717 APInt::tcShiftLeft(parts.data(), Words: dstPartsCount, Count: width - 1);
2718 }
2719
2720 return fs;
2721}
2722
2723/* Convert an unsigned integer SRC to a floating point number,
2724 rounding according to ROUNDING_MODE. The sign of the floating
2725 point number is not modified. */
2726APFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2727 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2728 category = fcNormal;
2729 unsigned omsb = APInt::tcMSB(parts: src, n: srcCount) + 1;
2730 integerPart *dst = significandParts();
2731 unsigned dstCount = partCount();
2732 unsigned precision = semantics->precision;
2733
2734 /* We want the most significant PRECISION bits of SRC. There may not
2735 be that many; extract what we can. */
2736 lostFraction lost_fraction;
2737 if (precision <= omsb) {
2738 exponent = omsb - 1;
2739 lost_fraction = lostFractionThroughTruncation(parts: src, partCount: srcCount,
2740 bits: omsb - precision);
2741 APInt::tcExtract(dst, dstCount, src, srcBits: precision, srcLSB: omsb - precision);
2742 } else {
2743 exponent = precision - 1;
2744 lost_fraction = lfExactlyZero;
2745 APInt::tcExtract(dst, dstCount, src, srcBits: omsb, srcLSB: 0);
2746 }
2747
2748 return normalize(rounding_mode, lost_fraction);
2749}
2750
2751APFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2752 roundingMode rounding_mode) {
2753 unsigned int partCount = Val.getNumWords();
2754 APInt api = Val;
2755
2756 sign = false;
2757 if (isSigned && api.isNegative()) {
2758 sign = true;
2759 api = -api;
2760 }
2761
2762 return convertFromUnsignedParts(src: api.getRawData(), srcCount: partCount, rounding_mode);
2763}
2764
2765Expected<APFloat::opStatus>
2766IEEEFloat::convertFromHexadecimalString(StringRef s,
2767 roundingMode rounding_mode) {
2768 lostFraction lost_fraction = lfExactlyZero;
2769
2770 category = fcNormal;
2771 zeroSignificand();
2772 exponent = 0;
2773
2774 integerPart *significand = significandParts();
2775 unsigned partsCount = partCount();
2776 unsigned bitPos = partsCount * integerPartWidth;
2777 bool computedTrailingFraction = false;
2778
2779 // Skip leading zeroes and any (hexa)decimal point.
2780 StringRef::iterator begin = s.begin();
2781 StringRef::iterator end = s.end();
2782 StringRef::iterator dot;
2783 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, dot: &dot);
2784 if (!PtrOrErr)
2785 return PtrOrErr.takeError();
2786 StringRef::iterator p = *PtrOrErr;
2787 StringRef::iterator firstSignificantDigit = p;
2788
2789 while (p != end) {
2790 integerPart hex_value;
2791
2792 if (*p == '.') {
2793 if (dot != end)
2794 return createError(Err: "String contains multiple dots");
2795 dot = p++;
2796 continue;
2797 }
2798
2799 hex_value = hexDigitValue(C: *p);
2800 if (hex_value == UINT_MAX)
2801 break;
2802
2803 p++;
2804
2805 // Store the number while we have space.
2806 if (bitPos) {
2807 bitPos -= 4;
2808 hex_value <<= bitPos % integerPartWidth;
2809 significand[bitPos / integerPartWidth] |= hex_value;
2810 } else if (!computedTrailingFraction) {
2811 auto FractOrErr = trailingHexadecimalFraction(p, end, digitValue: hex_value);
2812 if (!FractOrErr)
2813 return FractOrErr.takeError();
2814 lost_fraction = *FractOrErr;
2815 computedTrailingFraction = true;
2816 }
2817 }
2818
2819 /* Hex floats require an exponent but not a hexadecimal point. */
2820 if (p == end)
2821 return createError(Err: "Hex strings require an exponent");
2822 if (*p != 'p' && *p != 'P')
2823 return createError(Err: "Invalid character in significand");
2824 if (p == begin)
2825 return createError(Err: "Significand has no digits");
2826 if (dot != end && p - begin == 1)
2827 return createError(Err: "Significand has no digits");
2828
2829 /* Ignore the exponent if we are zero. */
2830 if (p != firstSignificantDigit) {
2831 int expAdjustment;
2832
2833 /* Implicit hexadecimal point? */
2834 if (dot == end)
2835 dot = p;
2836
2837 /* Calculate the exponent adjustment implicit in the number of
2838 significant digits. */
2839 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2840 if (expAdjustment < 0)
2841 expAdjustment++;
2842 expAdjustment = expAdjustment * 4 - 1;
2843
2844 /* Adjust for writing the significand starting at the most
2845 significant nibble. */
2846 expAdjustment += semantics->precision;
2847 expAdjustment -= partsCount * integerPartWidth;
2848
2849 /* Adjust for the given exponent. */
2850 auto ExpOrErr = totalExponent(p: p + 1, end, exponentAdjustment: expAdjustment);
2851 if (!ExpOrErr)
2852 return ExpOrErr.takeError();
2853 exponent = *ExpOrErr;
2854 }
2855
2856 return normalize(rounding_mode, lost_fraction);
2857}
2858
2859APFloat::opStatus
2860IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2861 unsigned sigPartCount, int exp,
2862 roundingMode rounding_mode) {
2863 fltSemantics calcSemantics = { .maxExponent: 32767, .minExponent: -32767, .precision: 0, .sizeInBits: 0 };
2864 integerPart pow5Parts[maxPowerOfFiveParts];
2865
2866 bool isNearest = rounding_mode == rmNearestTiesToEven ||
2867 rounding_mode == rmNearestTiesToAway;
2868
2869 unsigned parts = partCountForBits(bits: semantics->precision + 11);
2870
2871 /* Calculate pow(5, abs(exp)). */
2872 unsigned pow5PartCount = powerOf5(dst: pow5Parts, power: exp >= 0 ? exp : -exp);
2873
2874 for (;; parts *= 2) {
2875 unsigned int excessPrecision, truncatedBits;
2876
2877 calcSemantics.precision = parts * integerPartWidth - 1;
2878 excessPrecision = calcSemantics.precision - semantics->precision;
2879 truncatedBits = excessPrecision;
2880
2881 IEEEFloat decSig(calcSemantics, uninitialized);
2882 decSig.makeZero(Neg: sign);
2883 IEEEFloat pow5(calcSemantics);
2884
2885 opStatus sigStatus = decSig.convertFromUnsignedParts(
2886 src: decSigParts, srcCount: sigPartCount, rounding_mode: rmNearestTiesToEven);
2887 opStatus powStatus = pow5.convertFromUnsignedParts(src: pow5Parts, srcCount: pow5PartCount,
2888 rounding_mode: rmNearestTiesToEven);
2889 /* Add exp, as 10^n = 5^n * 2^n. */
2890 decSig.exponent += exp;
2891
2892 lostFraction calcLostFraction;
2893 integerPart HUerr, HUdistance;
2894 unsigned int powHUerr;
2895
2896 if (exp >= 0) {
2897 /* multiplySignificand leaves the precision-th bit set to 1. */
2898 calcLostFraction = decSig.multiplySignificand(rhs: pow5);
2899 powHUerr = powStatus != opOK;
2900 } else {
2901 calcLostFraction = decSig.divideSignificand(rhs: pow5);
2902 /* Denormal numbers have less precision. */
2903 if (decSig.exponent < semantics->minExponent) {
2904 excessPrecision += (semantics->minExponent - decSig.exponent);
2905 truncatedBits = excessPrecision;
2906 excessPrecision = std::min(a: excessPrecision, b: calcSemantics.precision);
2907 }
2908 /* Extra half-ulp lost in reciprocal of exponent. */
2909 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2910 }
2911
2912 /* Both multiplySignificand and divideSignificand return the
2913 result with the integer bit set. */
2914 assert(APInt::tcExtractBit
2915 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2916
2917 HUerr = HUerrBound(inexactMultiply: calcLostFraction != lfExactlyZero, HUerr1: sigStatus != opOK,
2918 HUerr2: powHUerr);
2919 HUdistance = 2 * ulpsFromBoundary(parts: decSig.significandParts(),
2920 bits: excessPrecision, isNearest);
2921
2922 /* Are we guaranteed to round correctly if we truncate? */
2923 if (HUdistance >= HUerr) {
2924 APInt::tcExtract(significandParts(), dstCount: partCount(), decSig.significandParts(),
2925 srcBits: calcSemantics.precision - excessPrecision,
2926 srcLSB: excessPrecision);
2927 /* Take the exponent of decSig. If we tcExtract-ed less bits
2928 above we must adjust our exponent to compensate for the
2929 implicit right shift. */
2930 exponent = (decSig.exponent + semantics->precision
2931 - (calcSemantics.precision - excessPrecision));
2932 calcLostFraction = lostFractionThroughTruncation(parts: decSig.significandParts(),
2933 partCount: decSig.partCount(),
2934 bits: truncatedBits);
2935 return static_cast<opStatus>(normalize(rounding_mode, lost_fraction: calcLostFraction) |
2936 ((sigStatus | powStatus) & opInexact));
2937 }
2938 }
2939}
2940
2941Expected<APFloat::opStatus>
2942IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2943 decimalInfo D;
2944 opStatus fs;
2945
2946 /* Scan the text. */
2947 StringRef::iterator p = str.begin();
2948 if (Error Err = interpretDecimal(begin: p, end: str.end(), D: &D))
2949 return std::move(Err);
2950
2951 /* Handle the quick cases. First the case of no significant digits,
2952 i.e. zero, and then exponents that are obviously too large or too
2953 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2954 definitely overflows if
2955
2956 (exp - 1) * L >= maxExponent
2957
2958 and definitely underflows to zero where
2959
2960 (exp + 1) * L <= minExponent - precision
2961
2962 With integer arithmetic the tightest bounds for L are
2963
2964 93/28 < L < 196/59 [ numerator <= 256 ]
2965 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2966 */
2967
2968 // Test if we have a zero number allowing for strings with no null terminators
2969 // and zero decimals with non-zero exponents.
2970 //
2971 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2972 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2973 // be at most one dot. On the other hand, if we have a zero with a non-zero
2974 // exponent, then we know that D.firstSigDigit will be non-numeric.
2975 if (D.firstSigDigit == str.end() || decDigitValue(c: *D.firstSigDigit) >= 10U) {
2976 category = fcZero;
2977 fs = opOK;
2978 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2979 sign = false;
2980 if (!semantics->hasZero)
2981 makeSmallestNormalized(Negative: false);
2982
2983 /* Check whether the normalized exponent is high enough to overflow
2984 max during the log-rebasing in the max-exponent check below. */
2985 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2986 fs = handleOverflow(rounding_mode);
2987
2988 /* If it wasn't, then it also wasn't high enough to overflow max
2989 during the log-rebasing in the min-exponent check. Check that it
2990 won't overflow min in either check, then perform the min-exponent
2991 check. */
2992 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2993 (D.normalizedExponent + 1) * 28738 <=
2994 8651 * (semantics->minExponent - (int) semantics->precision)) {
2995 /* Underflow to zero and round. */
2996 category = fcNormal;
2997 zeroSignificand();
2998 fs = normalize(rounding_mode, lost_fraction: lfLessThanHalf);
2999
3000 /* We can finally safely perform the max-exponent check. */
3001 } else if ((D.normalizedExponent - 1) * 42039
3002 >= 12655 * semantics->maxExponent) {
3003 /* Overflow and round. */
3004 fs = handleOverflow(rounding_mode);
3005 } else {
3006 integerPart *decSignificand;
3007 unsigned int partCount;
3008
3009 /* A tight upper bound on number of bits required to hold an
3010 N-digit decimal integer is N * 196 / 59. Allocate enough space
3011 to hold the full significand, and an extra part required by
3012 tcMultiplyPart. */
3013 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3014 partCount = partCountForBits(bits: 1 + 196 * partCount / 59);
3015 decSignificand = new integerPart[partCount + 1];
3016 partCount = 0;
3017
3018 /* Convert to binary efficiently - we do almost all multiplication
3019 in an integerPart. When this would overflow do we do a single
3020 bignum multiplication, and then revert again to multiplication
3021 in an integerPart. */
3022 do {
3023 integerPart decValue, val, multiplier;
3024
3025 val = 0;
3026 multiplier = 1;
3027
3028 do {
3029 if (*p == '.') {
3030 p++;
3031 if (p == str.end()) {
3032 break;
3033 }
3034 }
3035 decValue = decDigitValue(c: *p++);
3036 if (decValue >= 10U) {
3037 delete[] decSignificand;
3038 return createError(Err: "Invalid character in significand");
3039 }
3040 multiplier *= 10;
3041 val = val * 10 + decValue;
3042 /* The maximum number that can be multiplied by ten with any
3043 digit added without overflowing an integerPart. */
3044 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3045
3046 /* Multiply out the current part. */
3047 APInt::tcMultiplyPart(dst: decSignificand, src: decSignificand, multiplier, carry: val,
3048 srcParts: partCount, dstParts: partCount + 1, add: false);
3049
3050 /* If we used another part (likely but not guaranteed), increase
3051 the count. */
3052 if (decSignificand[partCount])
3053 partCount++;
3054 } while (p <= D.lastSigDigit);
3055
3056 category = fcNormal;
3057 fs = roundSignificandWithExponent(decSigParts: decSignificand, sigPartCount: partCount,
3058 exp: D.exponent, rounding_mode);
3059
3060 delete [] decSignificand;
3061 }
3062
3063 return fs;
3064}
3065
3066bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3067 const size_t MIN_NAME_SIZE = 3;
3068
3069 if (str.size() < MIN_NAME_SIZE)
3070 return false;
3071
3072 if (str == "inf" || str == "INFINITY" || str == "+Inf" || str == "+inf") {
3073 makeInf(Neg: false);
3074 return true;
3075 }
3076
3077 bool IsNegative = str.consume_front(Prefix: "-");
3078 if (IsNegative) {
3079 if (str.size() < MIN_NAME_SIZE)
3080 return false;
3081
3082 if (str == "inf" || str == "INFINITY" || str == "Inf") {
3083 makeInf(Neg: true);
3084 return true;
3085 }
3086 }
3087
3088 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3089 bool IsSignaling = str.consume_front_insensitive(Prefix: "s");
3090 if (IsSignaling) {
3091 if (str.size() < MIN_NAME_SIZE)
3092 return false;
3093 }
3094
3095 if (str.consume_front(Prefix: "nan") || str.consume_front(Prefix: "NaN")) {
3096 // A NaN without payload.
3097 if (str.empty()) {
3098 makeNaN(SNaN: IsSignaling, Negative: IsNegative);
3099 return true;
3100 }
3101
3102 // Allow the payload to be inside parentheses.
3103 if (str.front() == '(') {
3104 // Parentheses should be balanced (and not empty).
3105 if (str.size() <= 2 || str.back() != ')')
3106 return false;
3107
3108 str = str.slice(Start: 1, End: str.size() - 1);
3109 }
3110
3111 // Determine the payload number's radix.
3112 unsigned Radix = 10;
3113 if (str[0] == '0') {
3114 if (str.size() > 1 && tolower(c: str[1]) == 'x') {
3115 str = str.drop_front(N: 2);
3116 Radix = 16;
3117 } else {
3118 Radix = 8;
3119 }
3120 }
3121
3122 // Parse the payload and make the NaN.
3123 APInt Payload;
3124 if (!str.getAsInteger(Radix, Result&: Payload)) {
3125 makeNaN(SNaN: IsSignaling, Negative: IsNegative, fill: &Payload);
3126 return true;
3127 }
3128 }
3129
3130 return false;
3131}
3132
3133Expected<APFloat::opStatus>
3134IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
3135 if (str.empty())
3136 return createError(Err: "Invalid string length");
3137
3138 // Handle special cases.
3139 if (convertFromStringSpecials(str))
3140 return opOK;
3141
3142 /* Handle a leading minus sign. */
3143 StringRef::iterator p = str.begin();
3144 size_t slen = str.size();
3145 sign = *p == '-' ? 1 : 0;
3146 if (sign && !semantics->hasSignedRepr)
3147 llvm_unreachable(
3148 "This floating point format does not support signed values");
3149
3150 if (*p == '-' || *p == '+') {
3151 p++;
3152 slen--;
3153 if (!slen)
3154 return createError(Err: "String has no digits");
3155 }
3156
3157 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3158 if (slen == 2)
3159 return createError(Err: "Invalid string");
3160 return convertFromHexadecimalString(s: StringRef(p + 2, slen - 2),
3161 rounding_mode);
3162 }
3163
3164 return convertFromDecimalString(str: StringRef(p, slen), rounding_mode);
3165}
3166
3167/* Write out a hexadecimal representation of the floating point value
3168 to DST, which must be of sufficient size, in the C99 form
3169 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3170 excluding the terminating NUL.
3171
3172 If UPPERCASE, the output is in upper case, otherwise in lower case.
3173
3174 HEXDIGITS digits appear altogether, rounding the value if
3175 necessary. If HEXDIGITS is 0, the minimal precision to display the
3176 number precisely is used instead. If nothing would appear after
3177 the decimal point it is suppressed.
3178
3179 The decimal exponent is always printed and has at least one digit.
3180 Zero values display an exponent of zero. Infinities and NaNs
3181 appear as "infinity" or "nan" respectively.
3182
3183 The above rules are as specified by C99. There is ambiguity about
3184 what the leading hexadecimal digit should be. This implementation
3185 uses whatever is necessary so that the exponent is displayed as
3186 stored. This implies the exponent will fall within the IEEE format
3187 range, and the leading hexadecimal digit will be 0 (for denormals),
3188 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3189 any other digits zero).
3190*/
3191unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3192 bool upperCase,
3193 roundingMode rounding_mode) const {
3194 char *p = dst;
3195 if (sign)
3196 *dst++ = '-';
3197
3198 switch (category) {
3199 case fcInfinity:
3200 memcpy (dest: dst, src: upperCase ? infinityU: infinityL, n: sizeof infinityU - 1);
3201 dst += sizeof infinityL - 1;
3202 break;
3203
3204 case fcNaN:
3205 memcpy (dest: dst, src: upperCase ? NaNU: NaNL, n: sizeof NaNU - 1);
3206 dst += sizeof NaNU - 1;
3207 break;
3208
3209 case fcZero:
3210 *dst++ = '0';
3211 *dst++ = upperCase ? 'X': 'x';
3212 *dst++ = '0';
3213 if (hexDigits > 1) {
3214 *dst++ = '.';
3215 memset (s: dst, c: '0', n: hexDigits - 1);
3216 dst += hexDigits - 1;
3217 }
3218 *dst++ = upperCase ? 'P': 'p';
3219 *dst++ = '0';
3220 break;
3221
3222 case fcNormal:
3223 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3224 break;
3225 }
3226
3227 *dst = 0;
3228
3229 return static_cast<unsigned int>(dst - p);
3230}
3231
3232/* Does the hard work of outputting the correctly rounded hexadecimal
3233 form of a normal floating point number with the specified number of
3234 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3235 digits necessary to print the value precisely is output. */
3236char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3237 bool upperCase,
3238 roundingMode rounding_mode) const {
3239 *dst++ = '0';
3240 *dst++ = upperCase ? 'X': 'x';
3241
3242 bool roundUp = false;
3243 const char *hexDigitChars = upperCase ? hexDigitsUpper : hexDigitsLower;
3244
3245 const integerPart *significand = significandParts();
3246 unsigned partsCount = partCount();
3247
3248 /* +3 because the first digit only uses the single integer bit, so
3249 we have 3 virtual zero most-significant-bits. */
3250 unsigned valueBits = semantics->precision + 3;
3251 unsigned shift = integerPartWidth - valueBits % integerPartWidth;
3252
3253 /* The natural number of digits required ignoring trailing
3254 insignificant zeroes. */
3255 unsigned outputDigits = (valueBits - significandLSB() + 3) / 4;
3256
3257 /* hexDigits of zero means use the required number for the
3258 precision. Otherwise, see if we are truncating. If we are,
3259 find out if we need to round away from zero. */
3260 if (hexDigits) {
3261 if (hexDigits < outputDigits) {
3262 /* We are dropping non-zero bits, so need to check how to round.
3263 "bits" is the number of dropped bits. */
3264 unsigned int bits;
3265 lostFraction fraction;
3266
3267 bits = valueBits - hexDigits * 4;
3268 fraction = lostFractionThroughTruncation (parts: significand, partCount: partsCount, bits);
3269 roundUp = roundAwayFromZero(rounding_mode, lost_fraction: fraction, bit: bits);
3270 }
3271 outputDigits = hexDigits;
3272 }
3273
3274 /* Write the digits consecutively, and start writing in the location
3275 of the hexadecimal point. We move the most significant digit
3276 left and add the hexadecimal point later. */
3277 char *p = ++dst;
3278
3279 unsigned count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3280
3281 while (outputDigits && count) {
3282 integerPart part;
3283
3284 /* Put the most significant integerPartWidth bits in "part". */
3285 if (--count == partsCount)
3286 part = 0; /* An imaginary higher zero part. */
3287 else
3288 part = significand[count] << shift;
3289
3290 if (count && shift)
3291 part |= significand[count - 1] >> (integerPartWidth - shift);
3292
3293 /* Convert as much of "part" to hexdigits as we can. */
3294 unsigned int curDigits = integerPartWidth / 4;
3295
3296 curDigits = std::min(a: curDigits, b: outputDigits);
3297 dst += partAsHex (dst, part, count: curDigits, hexDigitChars);
3298 outputDigits -= curDigits;
3299 }
3300
3301 if (roundUp) {
3302 char *q = dst;
3303
3304 /* Note that hexDigitChars has a trailing '0'. */
3305 do {
3306 q--;
3307 *q = hexDigitChars[hexDigitValue (C: *q) + 1];
3308 } while (*q == '0');
3309 assert(q >= p);
3310 } else {
3311 /* Add trailing zeroes. */
3312 memset (s: dst, c: '0', n: outputDigits);
3313 dst += outputDigits;
3314 }
3315
3316 /* Move the most significant digit to before the point, and if there
3317 is something after the decimal point add it. This must come
3318 after rounding above. */
3319 p[-1] = p[0];
3320 if (dst -1 == p)
3321 dst--;
3322 else
3323 p[0] = '.';
3324
3325 /* Finally output the exponent. */
3326 *dst++ = upperCase ? 'P': 'p';
3327
3328 return writeSignedDecimal (dst, value: exponent);
3329}
3330
3331hash_code hash_value(const IEEEFloat &Arg) {
3332 if (!Arg.isFiniteNonZero())
3333 return hash_combine(args: (uint8_t)Arg.category,
3334 // NaN has no sign, fix it at zero.
3335 args: Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3336 args: Arg.semantics->precision);
3337
3338 // Normal floats need their exponent and significand hashed.
3339 return hash_combine(args: (uint8_t)Arg.category, args: (uint8_t)Arg.sign,
3340 args: Arg.semantics->precision, args: Arg.exponent,
3341 args: hash_combine_range(
3342 first: Arg.significandParts(),
3343 last: Arg.significandParts() + Arg.partCount()));
3344}
3345
3346// Conversion from APFloat to/from host float/double. It may eventually be
3347// possible to eliminate these and have everybody deal with APFloats, but that
3348// will take a while. This approach will not easily extend to long double.
3349// Current implementation requires integerPartWidth==64, which is correct at
3350// the moment but could be made more general.
3351
3352// Denormals have exponent minExponent in APFloat, but minExponent-1 in
3353// the actual IEEE respresentations. We compensate for that here.
3354
3355APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3356 assert(semantics ==
3357 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended);
3358 assert(partCount()==2);
3359
3360 uint64_t myexponent, mysignificand;
3361
3362 if (isFiniteNonZero()) {
3363 myexponent = exponent+16383; //bias
3364 mysignificand = significandParts()[0];
3365 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3366 myexponent = 0; // denormal
3367 } else if (category==fcZero) {
3368 myexponent = 0;
3369 mysignificand = 0;
3370 } else if (category==fcInfinity) {
3371 myexponent = 0x7fff;
3372 mysignificand = 0x8000000000000000ULL;
3373 } else {
3374 assert(category == fcNaN && "Unknown category");
3375 myexponent = 0x7fff;
3376 mysignificand = significandParts()[0];
3377 }
3378
3379 uint64_t words[2];
3380 words[0] = mysignificand;
3381 words[1] = ((uint64_t)(sign & 1) << 15) |
3382 (myexponent & 0x7fffLL);
3383 return APInt(80, words);
3384}
3385
3386APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
3387 assert(semantics ==
3388 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy);
3389 assert(partCount()==2);
3390
3391 uint64_t words[2];
3392 bool losesInfo;
3393
3394 // Convert number to double. To avoid spurious underflows, we re-
3395 // normalize against the "double" minExponent first, and only *then*
3396 // truncate the mantissa. The result of that second conversion
3397 // may be inexact, but should never underflow.
3398 // Declare fltSemantics before APFloat that uses it (and
3399 // saves pointer to it) to ensure correct destruction order.
3400 fltSemantics extendedSemantics = *semantics;
3401 extendedSemantics.minExponent = APFloatBase::semIEEEdouble.minExponent;
3402 IEEEFloat extended(*this);
3403 [[maybe_unused]] opStatus fs =
3404 extended.convert(toSemantics: extendedSemantics, rounding_mode: rmNearestTiesToEven, losesInfo: &losesInfo);
3405 assert(fs == opOK && !losesInfo);
3406
3407 IEEEFloat u(extended);
3408 fs = u.convert(toSemantics: APFloatBase::semIEEEdouble, rounding_mode: rmNearestTiesToEven, losesInfo: &losesInfo);
3409 assert(fs == opOK || fs == opInexact);
3410 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3411
3412 // If conversion was exact or resulted in a special case, we're done;
3413 // just set the second double to zero. Otherwise, re-convert back to
3414 // the extended format and compute the difference. This now should
3415 // convert exactly to double.
3416 if (u.isFiniteNonZero() && losesInfo) {
3417 fs = u.convert(toSemantics: extendedSemantics, rounding_mode: rmNearestTiesToEven, losesInfo: &losesInfo);
3418 assert(fs == opOK && !losesInfo);
3419
3420 IEEEFloat v(extended);
3421 v.subtract(rhs: u, rounding_mode: rmNearestTiesToEven);
3422 fs = v.convert(toSemantics: APFloatBase::semIEEEdouble, rounding_mode: rmNearestTiesToEven, losesInfo: &losesInfo);
3423 assert(fs == opOK && !losesInfo);
3424 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3425 } else {
3426 words[1] = 0;
3427 }
3428
3429 return APInt(128, words);
3430}
3431
3432template <const fltSemantics &S>
3433APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3434 assert(semantics == &S);
3435 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3436 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3437 constexpr integerPart integer_bit =
3438 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3439 constexpr uint64_t significand_mask = integer_bit - 1;
3440 constexpr unsigned int exponent_bits =
3441 S.sizeInBits - (S.hasSignedRepr ? 1 : 0) - trailing_significand_bits;
3442 static_assert(exponent_bits < 64);
3443 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3444 constexpr bool is_zero_exp_reserved = S.hasDenormals || S.hasZero;
3445 constexpr int bias = -(S.minExponent - (is_zero_exp_reserved ? 1 : 0));
3446
3447 uint64_t myexponent;
3448 std::array<integerPart, partCountForBits(bits: trailing_significand_bits)>
3449 mysignificand;
3450
3451 if (isFiniteNonZero()) {
3452 myexponent = exponent + bias;
3453 std::copy_n(significandParts(), mysignificand.size(),
3454 mysignificand.begin());
3455 if (myexponent == 1 &&
3456 !(significandParts()[integer_bit_part] & integer_bit))
3457 myexponent = 0; // denormal
3458 } else if (category == fcZero) {
3459 if (!S.hasZero)
3460 llvm_unreachable("semantics does not support zero!");
3461 myexponent = ::exponentZero(semantics: S) + bias;
3462 mysignificand.fill(0);
3463 } else if (category == fcInfinity) {
3464 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3465 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3466 llvm_unreachable("semantics don't support inf!");
3467 myexponent = ::exponentInf(semantics: S) + bias;
3468 mysignificand.fill(0);
3469 } else {
3470 assert(category == fcNaN && "Unknown category!");
3471 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3472 llvm_unreachable("semantics don't support NaN!");
3473 myexponent = ::exponentNaN(semantics: S) + bias;
3474 std::copy_n(significandParts(), mysignificand.size(),
3475 mysignificand.begin());
3476 }
3477 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3478 auto words_iter =
3479 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3480 if constexpr (significand_mask != 0 || trailing_significand_bits == 0) {
3481 // Clear the integer bit.
3482 words[mysignificand.size() - 1] &= significand_mask;
3483 }
3484 std::fill(words_iter, words.end(), uint64_t{0});
3485 constexpr size_t last_word = words.size() - 1;
3486 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3487 << ((S.sizeInBits - 1) % 64);
3488 words[last_word] |= shifted_sign;
3489 uint64_t shifted_exponent = (myexponent & exponent_mask)
3490 << (trailing_significand_bits % 64);
3491 words[last_word] |= shifted_exponent;
3492 if constexpr (last_word == 0) {
3493 return APInt(S.sizeInBits, words[0]);
3494 }
3495 return APInt(S.sizeInBits, words);
3496}
3497
3498APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3499 assert(partCount() == 2);
3500 return convertIEEEFloatToAPInt<APFloatBase::semIEEEquad>();
3501}
3502
3503APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3504 assert(partCount()==1);
3505 return convertIEEEFloatToAPInt<APFloatBase::semIEEEdouble>();
3506}
3507
3508APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3509 assert(partCount()==1);
3510 return convertIEEEFloatToAPInt<APFloatBase::semIEEEsingle>();
3511}
3512
3513APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3514 assert(partCount() == 1);
3515 return convertIEEEFloatToAPInt<APFloatBase::semBFloat>();
3516}
3517
3518APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3519 assert(partCount()==1);
3520 return convertIEEEFloatToAPInt<APFloatBase::APFloatBase::semIEEEhalf>();
3521}
3522
3523APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3524 assert(partCount() == 1);
3525 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2>();
3526}
3527
3528APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3529 assert(partCount() == 1);
3530 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2FNUZ>();
3531}
3532
3533APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3534 assert(partCount() == 1);
3535 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3>();
3536}
3537
3538APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3539 assert(partCount() == 1);
3540 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FN>();
3541}
3542
3543APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3544 assert(partCount() == 1);
3545 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FNUZ>();
3546}
3547
3548APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3549 assert(partCount() == 1);
3550 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3B11FNUZ>();
3551}
3552
3553APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
3554 assert(partCount() == 1);
3555 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E3M4>();
3556}
3557
3558APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3559 assert(partCount() == 1);
3560 return convertIEEEFloatToAPInt<APFloatBase::semFloatTF32>();
3561}
3562
3563APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
3564 assert(partCount() == 1);
3565 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E8M0FNU>();
3566}
3567
3568APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3569 assert(partCount() == 1);
3570 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E3M2FN>();
3571}
3572
3573APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3574 assert(partCount() == 1);
3575 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E2M3FN>();
3576}
3577
3578APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3579 assert(partCount() == 1);
3580 return convertIEEEFloatToAPInt<APFloatBase::semFloat4E2M1FN>();
3581}
3582
3583// This function creates an APInt that is just a bit map of the floating
3584// point constant as it would appear in memory. It is not a conversion,
3585// and treating the result as a normal integer is unlikely to be useful.
3586
3587APInt IEEEFloat::bitcastToAPInt() const {
3588 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEhalf)
3589 return convertHalfAPFloatToAPInt();
3590
3591 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semBFloat)
3592 return convertBFloatAPFloatToAPInt();
3593
3594 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
3595 return convertFloatAPFloatToAPInt();
3596
3597 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
3598 return convertDoubleAPFloatToAPInt();
3599
3600 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
3601 return convertQuadrupleAPFloatToAPInt();
3602
3603 if (semantics ==
3604 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy)
3605 return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
3606
3607 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2)
3608 return convertFloat8E5M2APFloatToAPInt();
3609
3610 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2FNUZ)
3611 return convertFloat8E5M2FNUZAPFloatToAPInt();
3612
3613 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3)
3614 return convertFloat8E4M3APFloatToAPInt();
3615
3616 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FN)
3617 return convertFloat8E4M3FNAPFloatToAPInt();
3618
3619 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FNUZ)
3620 return convertFloat8E4M3FNUZAPFloatToAPInt();
3621
3622 if (semantics ==
3623 (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3B11FNUZ)
3624 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3625
3626 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E3M4)
3627 return convertFloat8E3M4APFloatToAPInt();
3628
3629 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloatTF32)
3630 return convertFloatTF32APFloatToAPInt();
3631
3632 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E8M0FNU)
3633 return convertFloat8E8M0FNUAPFloatToAPInt();
3634
3635 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E3M2FN)
3636 return convertFloat6E3M2FNAPFloatToAPInt();
3637
3638 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E2M3FN)
3639 return convertFloat6E2M3FNAPFloatToAPInt();
3640
3641 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat4E2M1FN)
3642 return convertFloat4E2M1FNAPFloatToAPInt();
3643
3644 assert(semantics ==
3645 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended &&
3646 "unknown format!");
3647 return convertF80LongDoubleAPFloatToAPInt();
3648}
3649
3650float IEEEFloat::convertToFloat() const {
3651 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle &&
3652 "Float semantics are not IEEEsingle");
3653 APInt api = bitcastToAPInt();
3654 return api.bitsToFloat();
3655}
3656
3657double IEEEFloat::convertToDouble() const {
3658 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble &&
3659 "Float semantics are not IEEEdouble");
3660 APInt api = bitcastToAPInt();
3661 return api.bitsToDouble();
3662}
3663
3664#ifdef HAS_IEE754_FLOAT128
3665float128 IEEEFloat::convertToQuad() const {
3666 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad &&
3667 "Float semantics are not IEEEquads");
3668 APInt api = bitcastToAPInt();
3669 return api.bitsToQuad();
3670}
3671#endif
3672
3673/// Integer bit is explicit in this format. Intel hardware (387 and later)
3674/// does not support these bit patterns:
3675/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3676/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3677/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3678/// exponent = 0, integer bit 1 ("pseudodenormal")
3679/// At the moment, the first three are treated as NaNs, the last one as Normal.
3680void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3681 uint64_t i1 = api.getRawData()[0];
3682 uint64_t i2 = api.getRawData()[1];
3683 uint64_t myexponent = (i2 & 0x7fff);
3684 uint64_t mysignificand = i1;
3685 uint8_t myintegerbit = mysignificand >> 63;
3686
3687 initialize(ourSemantics: &APFloatBase::semX87DoubleExtended);
3688 assert(partCount()==2);
3689
3690 sign = static_cast<unsigned int>(i2>>15);
3691 if (myexponent == 0 && mysignificand == 0) {
3692 makeZero(Neg: sign);
3693 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3694 makeInf(Neg: sign);
3695 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3696 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3697 category = fcNaN;
3698 exponent = exponentNaN();
3699 significandParts()[0] = mysignificand;
3700 significandParts()[1] = 0;
3701 } else {
3702 category = fcNormal;
3703 exponent = myexponent - 16383;
3704 significandParts()[0] = mysignificand;
3705 significandParts()[1] = 0;
3706 if (myexponent==0) // denormal
3707 exponent = -16382;
3708 }
3709}
3710
3711void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) {
3712 uint64_t i1 = api.getRawData()[0];
3713 uint64_t i2 = api.getRawData()[1];
3714 bool losesInfo;
3715
3716 // Get the first double and convert to our format.
3717 initFromDoubleAPInt(api: APInt(64, i1));
3718 [[maybe_unused]] opStatus fs = convert(toSemantics: APFloatBase::semPPCDoubleDoubleLegacy,
3719 rounding_mode: rmNearestTiesToEven, losesInfo: &losesInfo);
3720 // (convert may return opInvalidOp if i1 is an sNaN).
3721 assert((fs == opOK || fs == opInvalidOp) && !losesInfo);
3722
3723 // Unless we have a special case, add in second double.
3724 if (isFiniteNonZero()) {
3725 IEEEFloat v(APFloatBase::semIEEEdouble, APInt(64, i2));
3726 fs = v.convert(toSemantics: APFloatBase::semPPCDoubleDoubleLegacy, rounding_mode: rmNearestTiesToEven,
3727 losesInfo: &losesInfo);
3728 assert(fs == opOK && !losesInfo);
3729
3730 add(rhs: v, rounding_mode: rmNearestTiesToEven);
3731 }
3732}
3733
3734// The E8M0 format has the following characteristics:
3735// It is an 8-bit unsigned format with only exponents (no actual significand).
3736// No encodings for {zero, infinities or denorms}.
3737// NaN is represented by all 1's.
3738// Bias is 127.
3739void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
3740 initFromIEEEAPInt<APFloatBase::semFloat8E8M0FNU>(api);
3741}
3742
3743template <const fltSemantics &S>
3744void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3745 assert(api.getBitWidth() == S.sizeInBits);
3746
3747 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3748 constexpr integerPart integer_bit =
3749 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3750 constexpr uint64_t significand_mask = integer_bit - 1;
3751 constexpr unsigned int exponent_bits =
3752 S.sizeInBits - (S.hasSignedRepr ? 1 : 0) - trailing_significand_bits;
3753 constexpr unsigned int stored_significand_parts =
3754 partCountForBits(bits: trailing_significand_bits);
3755 static_assert(exponent_bits < 64);
3756 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3757 constexpr bool is_zero_exp_reserved = S.hasDenormals || S.hasZero;
3758 constexpr int bias = -(S.minExponent - (is_zero_exp_reserved ? 1 : 0));
3759 constexpr bool has_significand = trailing_significand_bits > 0;
3760
3761 // Copy the bits of the significand. We need to clear out the exponent and
3762 // sign bit in the last word.
3763 std::array<integerPart, stored_significand_parts> mysignificand;
3764 if constexpr (has_significand) {
3765 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3766 if constexpr (significand_mask != 0) {
3767 mysignificand[mysignificand.size() - 1] &= significand_mask;
3768 }
3769 } else {
3770 std::fill_n(mysignificand.begin(), mysignificand.size(), 0);
3771 // Always set integer bit to 1 for consistency in APFloat's internal
3772 // representation.
3773 mysignificand[0] = 1;
3774 }
3775
3776 // We assume the last word holds the sign bit, the exponent, and potentially
3777 // some of the trailing significand field.
3778 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3779 uint64_t myexponent =
3780 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3781
3782 initialize(ourSemantics: &S);
3783 assert(partCount() == mysignificand.size());
3784
3785 sign = S.hasSignedRepr
3786 ? static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64))
3787 : 0;
3788
3789 bool all_zero_significand =
3790 has_significand && llvm::all_of(mysignificand, equal_to(Arg: 0));
3791
3792 bool is_zero = myexponent == 0 && all_zero_significand && S.hasZero;
3793
3794 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3795 if (myexponent - bias == ::exponentInf(semantics: S) && all_zero_significand) {
3796 makeInf(Neg: sign);
3797 return;
3798 }
3799 }
3800
3801 bool is_nan = false;
3802
3803 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3804 is_nan = myexponent - bias == ::exponentNaN(semantics: S) && !all_zero_significand;
3805 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3806 bool all_ones_significand =
3807 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3808 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3809 (!significand_mask ||
3810 mysignificand[mysignificand.size() - 1] == significand_mask);
3811 is_nan = myexponent - bias == ::exponentNaN(semantics: S) && all_ones_significand;
3812 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3813 is_nan = is_zero && sign;
3814 }
3815
3816 if (is_nan) {
3817 category = fcNaN;
3818 exponent = ::exponentNaN(semantics: S);
3819 std::copy_n(mysignificand.begin(), mysignificand.size(),
3820 significandParts());
3821 return;
3822 }
3823
3824 if (is_zero) {
3825 makeZero(Neg: sign);
3826 return;
3827 }
3828
3829 category = fcNormal;
3830 exponent = myexponent - bias;
3831 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3832 if (myexponent == 0 && S.hasDenormals) // denormal
3833 exponent = S.minExponent;
3834 else
3835 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3836}
3837
3838void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3839 initFromIEEEAPInt<APFloatBase::semIEEEquad>(api);
3840}
3841
3842void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3843 initFromIEEEAPInt<APFloatBase::semIEEEdouble>(api);
3844}
3845
3846void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3847 initFromIEEEAPInt<APFloatBase::semIEEEsingle>(api);
3848}
3849
3850void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3851 initFromIEEEAPInt<APFloatBase::semBFloat>(api);
3852}
3853
3854void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3855 initFromIEEEAPInt<APFloatBase::semIEEEhalf>(api);
3856}
3857
3858void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3859 initFromIEEEAPInt<APFloatBase::semFloat8E5M2>(api);
3860}
3861
3862void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3863 initFromIEEEAPInt<APFloatBase::semFloat8E5M2FNUZ>(api);
3864}
3865
3866void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
3867 initFromIEEEAPInt<APFloatBase::semFloat8E4M3>(api);
3868}
3869
3870void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3871 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FN>(api);
3872}
3873
3874void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3875 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FNUZ>(api);
3876}
3877
3878void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3879 initFromIEEEAPInt<APFloatBase::semFloat8E4M3B11FNUZ>(api);
3880}
3881
3882void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
3883 initFromIEEEAPInt<APFloatBase::semFloat8E3M4>(api);
3884}
3885
3886void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3887 initFromIEEEAPInt<APFloatBase::semFloatTF32>(api);
3888}
3889
3890void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
3891 initFromIEEEAPInt<APFloatBase::semFloat6E3M2FN>(api);
3892}
3893
3894void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
3895 initFromIEEEAPInt<APFloatBase::semFloat6E2M3FN>(api);
3896}
3897
3898void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
3899 initFromIEEEAPInt<APFloatBase::semFloat4E2M1FN>(api);
3900}
3901
3902/// Treat api as containing the bits of a floating point number.
3903void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3904 assert(api.getBitWidth() == Sem->sizeInBits);
3905 if (Sem == &APFloatBase::semIEEEhalf)
3906 return initFromHalfAPInt(api);
3907 if (Sem == &APFloatBase::semBFloat)
3908 return initFromBFloatAPInt(api);
3909 if (Sem == &APFloatBase::semIEEEsingle)
3910 return initFromFloatAPInt(api);
3911 if (Sem == &APFloatBase::semIEEEdouble)
3912 return initFromDoubleAPInt(api);
3913 if (Sem == &APFloatBase::semX87DoubleExtended)
3914 return initFromF80LongDoubleAPInt(api);
3915 if (Sem == &APFloatBase::semIEEEquad)
3916 return initFromQuadrupleAPInt(api);
3917 if (Sem == &APFloatBase::semPPCDoubleDoubleLegacy)
3918 return initFromPPCDoubleDoubleLegacyAPInt(api);
3919 if (Sem == &APFloatBase::semFloat8E5M2)
3920 return initFromFloat8E5M2APInt(api);
3921 if (Sem == &APFloatBase::semFloat8E5M2FNUZ)
3922 return initFromFloat8E5M2FNUZAPInt(api);
3923 if (Sem == &APFloatBase::semFloat8E4M3)
3924 return initFromFloat8E4M3APInt(api);
3925 if (Sem == &APFloatBase::semFloat8E4M3FN)
3926 return initFromFloat8E4M3FNAPInt(api);
3927 if (Sem == &APFloatBase::semFloat8E4M3FNUZ)
3928 return initFromFloat8E4M3FNUZAPInt(api);
3929 if (Sem == &APFloatBase::semFloat8E4M3B11FNUZ)
3930 return initFromFloat8E4M3B11FNUZAPInt(api);
3931 if (Sem == &APFloatBase::semFloat8E3M4)
3932 return initFromFloat8E3M4APInt(api);
3933 if (Sem == &APFloatBase::semFloatTF32)
3934 return initFromFloatTF32APInt(api);
3935 if (Sem == &APFloatBase::semFloat8E8M0FNU)
3936 return initFromFloat8E8M0FNUAPInt(api);
3937 if (Sem == &APFloatBase::semFloat6E3M2FN)
3938 return initFromFloat6E3M2FNAPInt(api);
3939 if (Sem == &APFloatBase::semFloat6E2M3FN)
3940 return initFromFloat6E2M3FNAPInt(api);
3941 if (Sem == &APFloatBase::semFloat4E2M1FN)
3942 return initFromFloat4E2M1FNAPInt(api);
3943
3944 llvm_unreachable("unsupported semantics");
3945}
3946
3947/// Make this number the largest magnitude normal number in the given
3948/// semantics.
3949void IEEEFloat::makeLargest(bool Negative) {
3950 if (Negative && !semantics->hasSignedRepr)
3951 llvm_unreachable(
3952 "This floating point format does not support signed values");
3953 // We want (in interchange format):
3954 // sign = {Negative}
3955 // exponent = 1..10
3956 // significand = 1..1
3957 category = fcNormal;
3958 sign = Negative;
3959 exponent = semantics->maxExponent;
3960
3961 // Use memset to set all but the highest integerPart to all ones.
3962 integerPart *significand = significandParts();
3963 unsigned PartCount = partCount();
3964 memset(s: significand, c: 0xFF, n: sizeof(integerPart)*(PartCount - 1));
3965
3966 // Set the high integerPart especially setting all unused top bits for
3967 // internal consistency.
3968 const unsigned NumUnusedHighBits =
3969 PartCount*integerPartWidth - semantics->precision;
3970 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3971 ? (~integerPart(0) >> NumUnusedHighBits)
3972 : 0;
3973 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
3974 semantics->nanEncoding == fltNanEncoding::AllOnes &&
3975 (semantics->precision > 1))
3976 significand[0] &= ~integerPart(1);
3977}
3978
3979/// Make this number the smallest magnitude denormal number in the given
3980/// semantics.
3981void IEEEFloat::makeSmallest(bool Negative) {
3982 if (Negative && !semantics->hasSignedRepr)
3983 llvm_unreachable(
3984 "This floating point format does not support signed values");
3985 // We want (in interchange format):
3986 // sign = {Negative}
3987 // exponent = 0..0
3988 // significand = 0..01
3989 category = fcNormal;
3990 sign = Negative;
3991 exponent = semantics->minExponent;
3992 APInt::tcSet(significandParts(), 1, partCount());
3993}
3994
3995void IEEEFloat::makeSmallestNormalized(bool Negative) {
3996 if (Negative && !semantics->hasSignedRepr)
3997 llvm_unreachable(
3998 "This floating point format does not support signed values");
3999 // We want (in interchange format):
4000 // sign = {Negative}
4001 // exponent = 0..0
4002 // significand = 10..0
4003
4004 category = fcNormal;
4005 zeroSignificand();
4006 sign = Negative;
4007 exponent = semantics->minExponent;
4008 APInt::tcSetBit(significandParts(), bit: semantics->precision - 1);
4009}
4010
4011IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4012 initFromAPInt(Sem: &Sem, api: API);
4013}
4014
4015IEEEFloat::IEEEFloat(float f) {
4016 initFromAPInt(Sem: &APFloatBase::semIEEEsingle, api: APInt::floatToBits(V: f));
4017}
4018
4019IEEEFloat::IEEEFloat(double d) {
4020 initFromAPInt(Sem: &APFloatBase::semIEEEdouble, api: APInt::doubleToBits(V: d));
4021}
4022
4023namespace {
4024 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4025 Buffer.append(in_start: Str.begin(), in_end: Str.end());
4026 }
4027
4028 /// Removes data from the given significand until it is no more
4029 /// precise than is required for the desired precision.
4030 void AdjustToPrecision(APInt &significand,
4031 int &exp, unsigned FormatPrecision) {
4032 unsigned bits = significand.getActiveBits();
4033
4034 // 196/59 is a very slight overestimate of lg_2(10).
4035 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4036
4037 if (bits <= bitsRequired) return;
4038
4039 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4040 if (!tensRemovable) return;
4041
4042 exp += tensRemovable;
4043
4044 APInt divisor(significand.getBitWidth(), 1);
4045 APInt powten(significand.getBitWidth(), 10);
4046 while (true) {
4047 if (tensRemovable & 1)
4048 divisor *= powten;
4049 tensRemovable >>= 1;
4050 if (!tensRemovable) break;
4051 powten *= powten;
4052 }
4053
4054 significand = significand.udiv(RHS: divisor);
4055
4056 // Truncate the significand down to its active bit count.
4057 significand = significand.trunc(width: significand.getActiveBits());
4058 }
4059
4060
4061 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4062 int &exp, unsigned FormatPrecision) {
4063 unsigned N = buffer.size();
4064 if (N <= FormatPrecision) return;
4065
4066 // The most significant figures are the last ones in the buffer.
4067 unsigned FirstSignificant = N - FormatPrecision;
4068
4069 // Round.
4070 // FIXME: this probably shouldn't use 'round half up'.
4071
4072 // Rounding down is just a truncation, except we also want to drop
4073 // trailing zeros from the new result.
4074 if (buffer[FirstSignificant - 1] < '5') {
4075 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4076 FirstSignificant++;
4077
4078 exp += FirstSignificant;
4079 buffer.erase(CS: &buffer[0], CE: &buffer[FirstSignificant]);
4080 return;
4081 }
4082
4083 // Rounding up requires a decimal add-with-carry. If we continue
4084 // the carry, the newly-introduced zeros will just be truncated.
4085 for (unsigned I = FirstSignificant; I != N; ++I) {
4086 if (buffer[I] == '9') {
4087 FirstSignificant++;
4088 } else {
4089 buffer[I]++;
4090 break;
4091 }
4092 }
4093
4094 // If we carried through, we have exactly one digit of precision.
4095 if (FirstSignificant == N) {
4096 exp += FirstSignificant;
4097 buffer.clear();
4098 buffer.push_back(Elt: '1');
4099 return;
4100 }
4101
4102 exp += FirstSignificant;
4103 buffer.erase(CS: &buffer[0], CE: &buffer[FirstSignificant]);
4104 }
4105
4106 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4107 APInt significand, unsigned FormatPrecision,
4108 unsigned FormatMaxPadding, bool TruncateZero) {
4109 const int semanticsPrecision = significand.getBitWidth();
4110
4111 if (isNeg)
4112 Str.push_back(Elt: '-');
4113
4114 // Set FormatPrecision if zero. We want to do this before we
4115 // truncate trailing zeros, as those are part of the precision.
4116 if (!FormatPrecision) {
4117 // We use enough digits so the number can be round-tripped back to an
4118 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4119 // Accurately" by Steele and White.
4120 // FIXME: Using a formula based purely on the precision is conservative;
4121 // we can print fewer digits depending on the actual value being printed.
4122
4123 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4124 FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4125 }
4126
4127 // Ignore trailing binary zeros.
4128 int trailingZeros = significand.countr_zero();
4129 exp += trailingZeros;
4130 significand.lshrInPlace(ShiftAmt: trailingZeros);
4131
4132 // Change the exponent from 2^e to 10^e.
4133 if (exp == 0) {
4134 // Nothing to do.
4135 } else if (exp > 0) {
4136 // Just shift left.
4137 significand = significand.zext(width: semanticsPrecision + exp);
4138 significand <<= exp;
4139 exp = 0;
4140 } else { /* exp < 0 */
4141 int texp = -exp;
4142
4143 // We transform this using the identity:
4144 // (N)(2^-e) == (N)(5^e)(10^-e)
4145 // This means we have to multiply N (the significand) by 5^e.
4146 // To avoid overflow, we have to operate on numbers large
4147 // enough to store N * 5^e:
4148 // log2(N * 5^e) == log2(N) + e * log2(5)
4149 // <= semantics->precision + e * 137 / 59
4150 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4151
4152 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4153
4154 // Multiply significand by 5^e.
4155 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4156 significand = significand.zext(width: precision);
4157 APInt five_to_the_i(precision, 5);
4158 while (true) {
4159 if (texp & 1)
4160 significand *= five_to_the_i;
4161
4162 texp >>= 1;
4163 if (!texp)
4164 break;
4165 five_to_the_i *= five_to_the_i;
4166 }
4167 }
4168
4169 AdjustToPrecision(significand, exp, FormatPrecision);
4170
4171 SmallVector<char, 256> buffer;
4172
4173 // Fill the buffer.
4174 unsigned precision = significand.getBitWidth();
4175 if (precision < 4) {
4176 // We need enough precision to store the value 10.
4177 precision = 4;
4178 significand = significand.zext(width: precision);
4179 }
4180 APInt ten(precision, 10);
4181 APInt digit(precision, 0);
4182
4183 bool inTrail = true;
4184 while (significand != 0) {
4185 // digit <- significand % 10
4186 // significand <- significand / 10
4187 APInt::udivrem(LHS: significand, RHS: ten, Quotient&: significand, Remainder&: digit);
4188
4189 unsigned d = digit.getZExtValue();
4190
4191 // Drop trailing zeros.
4192 if (inTrail && !d)
4193 exp++;
4194 else {
4195 buffer.push_back(Elt: (char) ('0' + d));
4196 inTrail = false;
4197 }
4198 }
4199
4200 assert(!buffer.empty() && "no characters in buffer!");
4201
4202 // Drop down to FormatPrecision.
4203 // TODO: don't do more precise calculations above than are required.
4204 AdjustToPrecision(buffer, exp, FormatPrecision);
4205
4206 unsigned NDigits = buffer.size();
4207
4208 // Check whether we should use scientific notation.
4209 bool FormatScientific;
4210 if (!FormatMaxPadding) {
4211 FormatScientific = true;
4212 } else {
4213 if (exp >= 0) {
4214 // 765e3 --> 765000
4215 // ^^^
4216 // But we shouldn't make the number look more precise than it is.
4217 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4218 NDigits + (unsigned) exp > FormatPrecision);
4219 } else {
4220 // Power of the most significant digit.
4221 int MSD = exp + (int) (NDigits - 1);
4222 if (MSD >= 0) {
4223 // 765e-2 == 7.65
4224 FormatScientific = false;
4225 } else {
4226 // 765e-5 == 0.00765
4227 // ^ ^^
4228 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4229 }
4230 }
4231 }
4232
4233 // Scientific formatting is pretty straightforward.
4234 if (FormatScientific) {
4235 exp += (NDigits - 1);
4236
4237 Str.push_back(Elt: buffer[NDigits-1]);
4238 Str.push_back(Elt: '.');
4239 if (NDigits == 1 && TruncateZero)
4240 Str.push_back(Elt: '0');
4241 else
4242 for (unsigned I = 1; I != NDigits; ++I)
4243 Str.push_back(Elt: buffer[NDigits-1-I]);
4244 // Fill with zeros up to FormatPrecision.
4245 if (!TruncateZero && FormatPrecision > NDigits - 1)
4246 Str.append(NumInputs: FormatPrecision - NDigits + 1, Elt: '0');
4247 // For !TruncateZero we use lower 'e'.
4248 Str.push_back(Elt: TruncateZero ? 'E' : 'e');
4249
4250 Str.push_back(Elt: exp >= 0 ? '+' : '-');
4251 if (exp < 0)
4252 exp = -exp;
4253 SmallVector<char, 6> expbuf;
4254 do {
4255 expbuf.push_back(Elt: (char) ('0' + (exp % 10)));
4256 exp /= 10;
4257 } while (exp);
4258 // Exponent always at least two digits if we do not truncate zeros.
4259 if (!TruncateZero && expbuf.size() < 2)
4260 expbuf.push_back(Elt: '0');
4261 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4262 Str.push_back(Elt: expbuf[E-1-I]);
4263 return;
4264 }
4265
4266 // Non-scientific, positive exponents.
4267 if (exp >= 0) {
4268 for (unsigned I = 0; I != NDigits; ++I)
4269 Str.push_back(Elt: buffer[NDigits-1-I]);
4270 for (unsigned I = 0; I != (unsigned) exp; ++I)
4271 Str.push_back(Elt: '0');
4272 return;
4273 }
4274
4275 // Non-scientific, negative exponents.
4276
4277 // The number of digits to the left of the decimal point.
4278 int NWholeDigits = exp + (int) NDigits;
4279
4280 unsigned I = 0;
4281 if (NWholeDigits > 0) {
4282 for (; I != (unsigned) NWholeDigits; ++I)
4283 Str.push_back(Elt: buffer[NDigits-I-1]);
4284 Str.push_back(Elt: '.');
4285 } else {
4286 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4287
4288 Str.push_back(Elt: '0');
4289 Str.push_back(Elt: '.');
4290 for (unsigned Z = 1; Z != NZeros; ++Z)
4291 Str.push_back(Elt: '0');
4292 }
4293
4294 for (; I != NDigits; ++I)
4295 Str.push_back(Elt: buffer[NDigits-I-1]);
4296
4297 }
4298} // namespace
4299
4300void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4301 unsigned FormatMaxPadding, bool TruncateZero) const {
4302 switch (category) {
4303 case fcInfinity:
4304 if (isNegative())
4305 return append(Buffer&: Str, Str: "-Inf");
4306 else
4307 return append(Buffer&: Str, Str: "+Inf");
4308
4309 case fcNaN: return append(Buffer&: Str, Str: "NaN");
4310
4311 case fcZero:
4312 if (isNegative())
4313 Str.push_back(Elt: '-');
4314
4315 if (!FormatMaxPadding) {
4316 if (TruncateZero)
4317 append(Buffer&: Str, Str: "0.0E+0");
4318 else {
4319 append(Buffer&: Str, Str: "0.0");
4320 if (FormatPrecision > 1)
4321 Str.append(NumInputs: FormatPrecision - 1, Elt: '0');
4322 append(Buffer&: Str, Str: "e+00");
4323 }
4324 } else {
4325 Str.push_back(Elt: '0');
4326 }
4327 return;
4328
4329 case fcNormal:
4330 break;
4331 }
4332
4333 // Decompose the number into an APInt and an exponent.
4334 int exp = exponent - ((int) semantics->precision - 1);
4335 APInt significand(
4336 semantics->precision,
4337 ArrayRef(significandParts(), partCountForBits(bits: semantics->precision)));
4338
4339 toStringImpl(Str, isNeg: isNegative(), exp, significand, FormatPrecision,
4340 FormatMaxPadding, TruncateZero);
4341
4342}
4343
4344int IEEEFloat::getExactLog2Abs() const {
4345 if (!isFinite() || isZero())
4346 return INT_MIN;
4347
4348 const integerPart *Parts = significandParts();
4349 const int PartCount = partCountForBits(bits: semantics->precision);
4350
4351 int PopCount = 0;
4352 for (int i = 0; i < PartCount; ++i) {
4353 PopCount += llvm::popcount(Value: Parts[i]);
4354 if (PopCount > 1)
4355 return INT_MIN;
4356 }
4357
4358 if (exponent != semantics->minExponent)
4359 return exponent;
4360
4361 int CountrParts = 0;
4362 for (int i = 0; i < PartCount;
4363 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4364 if (Parts[i] != 0) {
4365 return exponent - semantics->precision + CountrParts +
4366 llvm::countr_zero(Val: Parts[i]) + 1;
4367 }
4368 }
4369
4370 llvm_unreachable("didn't find the set bit");
4371}
4372
4373bool IEEEFloat::isSignaling() const {
4374 if (!isNaN())
4375 return false;
4376 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4377 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4378 return false;
4379
4380 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4381 // first bit of the trailing significand being 0.
4382 return !APInt::tcExtractBit(significandParts(), bit: semantics->precision - 2);
4383}
4384
4385/// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4386///
4387/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4388/// appropriate sign switching before/after the computation.
4389APFloat::opStatus IEEEFloat::next(bool nextDown) {
4390 // If we are performing nextDown, swap sign so we have -x.
4391 if (nextDown)
4392 changeSign();
4393
4394 // Compute nextUp(x)
4395 opStatus result = opOK;
4396
4397 // Handle each float category separately.
4398 switch (category) {
4399 case fcInfinity:
4400 // nextUp(+inf) = +inf
4401 if (!isNegative())
4402 break;
4403 // nextUp(-inf) = -getLargest()
4404 makeLargest(Negative: true);
4405 break;
4406 case fcNaN:
4407 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4408 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4409 // change the payload.
4410 if (isSignaling()) {
4411 result = opInvalidOp;
4412 // For consistency, propagate the sign of the sNaN to the qNaN.
4413 makeNaN(SNaN: false, Negative: isNegative(), fill: nullptr);
4414 }
4415 break;
4416 case fcZero:
4417 // nextUp(pm 0) = +getSmallest()
4418 makeSmallest(Negative: false);
4419 break;
4420 case fcNormal:
4421 // nextUp(-getSmallest()) = -0
4422 if (isSmallest() && isNegative()) {
4423 APInt::tcSet(significandParts(), 0, partCount());
4424 category = fcZero;
4425 exponent = 0;
4426 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4427 sign = false;
4428 if (!semantics->hasZero)
4429 makeSmallestNormalized(Negative: false);
4430 break;
4431 }
4432
4433 if (isLargest() && !isNegative()) {
4434 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4435 // nextUp(getLargest()) == NAN
4436 makeNaN();
4437 break;
4438 } else if (semantics->nonFiniteBehavior ==
4439 fltNonfiniteBehavior::FiniteOnly) {
4440 // nextUp(getLargest()) == getLargest()
4441 break;
4442 } else {
4443 // nextUp(getLargest()) == INFINITY
4444 APInt::tcSet(significandParts(), 0, partCount());
4445 category = fcInfinity;
4446 exponent = semantics->maxExponent + 1;
4447 break;
4448 }
4449 }
4450
4451 // nextUp(normal) == normal + inc.
4452 if (isNegative()) {
4453 // If we are negative, we need to decrement the significand.
4454
4455 // We only cross a binade boundary that requires adjusting the exponent
4456 // if:
4457 // 1. exponent != semantics->minExponent. This implies we are not in the
4458 // smallest binade or are dealing with denormals.
4459 // 2. Our significand excluding the integral bit is all zeros.
4460 bool WillCrossBinadeBoundary =
4461 exponent != semantics->minExponent && isSignificandAllZeros();
4462
4463 // Decrement the significand.
4464 //
4465 // We always do this since:
4466 // 1. If we are dealing with a non-binade decrement, by definition we
4467 // just decrement the significand.
4468 // 2. If we are dealing with a normal -> normal binade decrement, since
4469 // we have an explicit integral bit the fact that all bits but the
4470 // integral bit are zero implies that subtracting one will yield a
4471 // significand with 0 integral bit and 1 in all other spots. Thus we
4472 // must just adjust the exponent and set the integral bit to 1.
4473 // 3. If we are dealing with a normal -> denormal binade decrement,
4474 // since we set the integral bit to 0 when we represent denormals, we
4475 // just decrement the significand.
4476 integerPart *Parts = significandParts();
4477 APInt::tcDecrement(dst: Parts, parts: partCount());
4478
4479 if (WillCrossBinadeBoundary) {
4480 // Our result is a normal number. Do the following:
4481 // 1. Set the integral bit to 1.
4482 // 2. Decrement the exponent.
4483 APInt::tcSetBit(Parts, bit: semantics->precision - 1);
4484 exponent--;
4485 }
4486 } else {
4487 // If we are positive, we need to increment the significand.
4488
4489 // We only cross a binade boundary that requires adjusting the exponent if
4490 // the input is not a denormal and all of said input's significand bits
4491 // are set. If all of said conditions are true: clear the significand, set
4492 // the integral bit to 1, and increment the exponent. If we have a
4493 // denormal always increment since moving denormals and the numbers in the
4494 // smallest normal binade have the same exponent in our representation.
4495 // If there are only exponents, any increment always crosses the
4496 // BinadeBoundary.
4497 bool WillCrossBinadeBoundary = !APFloat::hasSignificand(Sem: *semantics) ||
4498 (!isDenormal() && isSignificandAllOnes());
4499
4500 if (WillCrossBinadeBoundary) {
4501 integerPart *Parts = significandParts();
4502 APInt::tcSet(Parts, 0, partCount());
4503 APInt::tcSetBit(Parts, bit: semantics->precision - 1);
4504 assert(exponent != semantics->maxExponent &&
4505 "We can not increment an exponent beyond the maxExponent allowed"
4506 " by the given floating point semantics.");
4507 exponent++;
4508 } else {
4509 incrementSignificand();
4510 }
4511 }
4512 break;
4513 }
4514
4515 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4516 if (nextDown)
4517 changeSign();
4518
4519 return result;
4520}
4521
4522APInt IEEEFloat::getNaNPayload() const {
4523 assert(isNaN() && "Can only be called on NaN values");
4524 // Number of bits in the payload, excluding the (maybe implied) integer bit.
4525 unsigned Bits = semantics->precision - 1;
4526 return APInt(Bits, ArrayRef(significandParts(), partCountForBits(bits: Bits)));
4527}
4528
4529APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4530 return ::exponentNaN(semantics: *semantics);
4531}
4532
4533APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4534 return ::exponentInf(semantics: *semantics);
4535}
4536
4537APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4538 return ::exponentZero(semantics: *semantics);
4539}
4540
4541void IEEEFloat::makeInf(bool Negative) {
4542 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4543 llvm_unreachable("This floating point format does not support Inf");
4544
4545 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4546 // There is no Inf, so make NaN instead.
4547 makeNaN(SNaN: false, Negative);
4548 return;
4549 }
4550 category = fcInfinity;
4551 sign = Negative;
4552 exponent = exponentInf();
4553 APInt::tcSet(significandParts(), 0, partCount());
4554}
4555
4556void IEEEFloat::makeZero(bool Negative) {
4557 if (!semantics->hasZero)
4558 llvm_unreachable("This floating point format does not support Zero");
4559
4560 category = fcZero;
4561 sign = Negative;
4562 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4563 // Merge negative zero to positive because 0b10000...000 is used for NaN
4564 sign = false;
4565 }
4566 exponent = exponentZero();
4567 APInt::tcSet(significandParts(), 0, partCount());
4568}
4569
4570void IEEEFloat::makeQuiet() {
4571 assert(isNaN());
4572 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4573 APInt::tcSetBit(significandParts(), bit: semantics->precision - 2);
4574}
4575
4576int ilogb(const IEEEFloat &Arg) {
4577 if (Arg.isNaN())
4578 return APFloat::IEK_NaN;
4579 if (Arg.isZero())
4580 return APFloat::IEK_Zero;
4581 if (Arg.isInfinity())
4582 return APFloat::IEK_Inf;
4583 if (!Arg.isDenormal())
4584 return Arg.exponent;
4585
4586 IEEEFloat Normalized(Arg);
4587 int SignificandBits = Arg.getSemantics().precision - 1;
4588
4589 Normalized.exponent += SignificandBits;
4590 Normalized.normalize(rounding_mode: APFloat::rmNearestTiesToEven, lost_fraction: lfExactlyZero);
4591 return Normalized.exponent - SignificandBits;
4592}
4593
4594IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode RoundingMode) {
4595 auto MaxExp = X.getSemantics().maxExponent;
4596 auto MinExp = X.getSemantics().minExponent;
4597
4598 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4599 // overflow; clamp it to a safe range before adding, but ensure that the range
4600 // is large enough that the clamp does not change the result. The range we
4601 // need to support is the difference between the largest possible exponent and
4602 // the normalized exponent of half the smallest denormal.
4603
4604 int SignificandBits = X.getSemantics().precision - 1;
4605 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4606
4607 // Clamp to one past the range ends to let normalize handle overlflow.
4608 X.exponent += std::clamp(val: Exp, lo: -MaxIncrement - 1, hi: MaxIncrement);
4609 X.normalize(rounding_mode: RoundingMode, lost_fraction: lfExactlyZero);
4610 if (X.isNaN())
4611 X.makeQuiet();
4612 return X;
4613}
4614
4615IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) {
4616 Exp = ilogb(Arg: Val);
4617
4618 // Quiet signalling nans.
4619 if (Exp == APFloat::IEK_NaN) {
4620 IEEEFloat Quiet(Val);
4621 Quiet.makeQuiet();
4622 return Quiet;
4623 }
4624
4625 if (Exp == APFloat::IEK_Inf)
4626 return Val;
4627
4628 // 1 is added because frexp is defined to return a normalized fraction in
4629 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4630 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1;
4631 return scalbn(X: Val, Exp: -Exp, RoundingMode: RM);
4632}
4633
4634DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4635 : Semantics(&S),
4636 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble),
4637 APFloat(APFloatBase::semIEEEdouble)}) {
4638 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4639}
4640
4641DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4642 : Semantics(&S), Floats(new APFloat[2]{
4643 APFloat(APFloatBase::semIEEEdouble, uninitialized),
4644 APFloat(APFloatBase::semIEEEdouble, uninitialized)}) {
4645 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4646}
4647
4648DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4649 : Semantics(&S),
4650 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble, I),
4651 APFloat(APFloatBase::semIEEEdouble)}) {
4652 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4653}
4654
4655DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4656 : Semantics(&S),
4657 Floats(new APFloat[2]{
4658 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[0])),
4659 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4660 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4661}
4662
4663DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4664 APFloat &&Second)
4665 : Semantics(&S),
4666 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4667 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4668 assert(&Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4669 assert(&Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4670}
4671
4672DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4673 : Semantics(RHS.Semantics),
4674 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4675 APFloat(RHS.Floats[1])}
4676 : nullptr) {
4677 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4678}
4679
4680DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4681 : Semantics(RHS.Semantics), Floats(RHS.Floats) {
4682 RHS.Semantics = &APFloatBase::semBogus;
4683 RHS.Floats = nullptr;
4684 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4685}
4686
4687DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4688 if (Semantics == RHS.Semantics && RHS.Floats) {
4689 Floats[0] = RHS.Floats[0];
4690 Floats[1] = RHS.Floats[1];
4691 } else if (this != &RHS) {
4692 this->~DoubleAPFloat();
4693 new (this) DoubleAPFloat(RHS);
4694 }
4695 return *this;
4696}
4697
4698// Returns a result such that:
4699// 1. abs(Lo) <= ulp(Hi)/2
4700// 2. Hi == RTNE(Hi + Lo)
4701// 3. Hi + Lo == X + Y
4702//
4703// Requires that log2(X) >= log2(Y).
4704static std::pair<APFloat, APFloat> fastTwoSum(APFloat X, APFloat Y) {
4705 if (!X.isFinite())
4706 return {X, APFloat::getZero(Sem: X.getSemantics(), /*Negative=*/false)};
4707 APFloat Hi = X + Y;
4708 APFloat Delta = Hi - X;
4709 APFloat Lo = Y - Delta;
4710 return {Hi, Lo};
4711}
4712
4713// Implement addition, subtraction, multiplication and division based on:
4714// "Software for Doubled-Precision Floating-Point Computations",
4715// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4716APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4717 const APFloat &c, const APFloat &cc,
4718 roundingMode RM) {
4719 int Status = opOK;
4720 APFloat z = a;
4721 Status |= z.add(RHS: c, RM);
4722 if (!z.isFinite()) {
4723 if (!z.isInfinity()) {
4724 Floats[0] = std::move(z);
4725 Floats[1].makeZero(/* Neg = */ false);
4726 return (opStatus)Status;
4727 }
4728 Status = opOK;
4729 auto AComparedToC = a.compareAbsoluteValue(RHS: c);
4730 z = cc;
4731 Status |= z.add(RHS: aa, RM);
4732 if (AComparedToC == APFloat::cmpGreaterThan) {
4733 // z = cc + aa + c + a;
4734 Status |= z.add(RHS: c, RM);
4735 Status |= z.add(RHS: a, RM);
4736 } else {
4737 // z = cc + aa + a + c;
4738 Status |= z.add(RHS: a, RM);
4739 Status |= z.add(RHS: c, RM);
4740 }
4741 if (!z.isFinite()) {
4742 Floats[0] = std::move(z);
4743 Floats[1].makeZero(/* Neg = */ false);
4744 return (opStatus)Status;
4745 }
4746 Floats[0] = z;
4747 APFloat zz = aa;
4748 Status |= zz.add(RHS: cc, RM);
4749 if (AComparedToC == APFloat::cmpGreaterThan) {
4750 // Floats[1] = a - z + c + zz;
4751 Floats[1] = a;
4752 Status |= Floats[1].subtract(RHS: z, RM);
4753 Status |= Floats[1].add(RHS: c, RM);
4754 Status |= Floats[1].add(RHS: zz, RM);
4755 } else {
4756 // Floats[1] = c - z + a + zz;
4757 Floats[1] = c;
4758 Status |= Floats[1].subtract(RHS: z, RM);
4759 Status |= Floats[1].add(RHS: a, RM);
4760 Status |= Floats[1].add(RHS: zz, RM);
4761 }
4762 } else {
4763 // q = a - z;
4764 APFloat q = a;
4765 Status |= q.subtract(RHS: z, RM);
4766
4767 // zz = q + c + (a - (q + z)) + aa + cc;
4768 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4769 auto zz = q;
4770 Status |= zz.add(RHS: c, RM);
4771 Status |= q.add(RHS: z, RM);
4772 Status |= q.subtract(RHS: a, RM);
4773 q.changeSign();
4774 Status |= zz.add(RHS: q, RM);
4775 Status |= zz.add(RHS: aa, RM);
4776 Status |= zz.add(RHS: cc, RM);
4777 if (zz.isZero() && !zz.isNegative()) {
4778 Floats[0] = std::move(z);
4779 Floats[1].makeZero(/* Neg = */ false);
4780 return opOK;
4781 }
4782 Floats[0] = z;
4783 Status |= Floats[0].add(RHS: zz, RM);
4784 if (!Floats[0].isFinite()) {
4785 Floats[1].makeZero(/* Neg = */ false);
4786 return (opStatus)Status;
4787 }
4788 Floats[1] = std::move(z);
4789 Status |= Floats[1].subtract(RHS: Floats[0], RM);
4790 Status |= Floats[1].add(RHS: zz, RM);
4791 }
4792 return (opStatus)Status;
4793}
4794
4795APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4796 const DoubleAPFloat &RHS,
4797 DoubleAPFloat &Out,
4798 roundingMode RM) {
4799 if (LHS.getCategory() == fcNaN) {
4800 Out = LHS;
4801 return opOK;
4802 }
4803 if (RHS.getCategory() == fcNaN) {
4804 Out = RHS;
4805 return opOK;
4806 }
4807 if (LHS.getCategory() == fcZero) {
4808 Out = RHS;
4809 return opOK;
4810 }
4811 if (RHS.getCategory() == fcZero) {
4812 Out = LHS;
4813 return opOK;
4814 }
4815 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4816 LHS.isNegative() != RHS.isNegative()) {
4817 Out.makeNaN(SNaN: false, Neg: Out.isNegative(), fill: nullptr);
4818 return opInvalidOp;
4819 }
4820 if (LHS.getCategory() == fcInfinity) {
4821 Out = LHS;
4822 return opOK;
4823 }
4824 if (RHS.getCategory() == fcInfinity) {
4825 Out = RHS;
4826 return opOK;
4827 }
4828 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4829
4830 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4831 CC(RHS.Floats[1]);
4832 assert(&A.getSemantics() == &APFloatBase::semIEEEdouble);
4833 assert(&AA.getSemantics() == &APFloatBase::semIEEEdouble);
4834 assert(&C.getSemantics() == &APFloatBase::semIEEEdouble);
4835 assert(&CC.getSemantics() == &APFloatBase::semIEEEdouble);
4836 assert(&Out.Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4837 assert(&Out.Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4838 return Out.addImpl(a: A, aa: AA, c: C, cc: CC, RM);
4839}
4840
4841APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4842 roundingMode RM) {
4843 return addWithSpecial(LHS: *this, RHS, Out&: *this, RM);
4844}
4845
4846APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4847 roundingMode RM) {
4848 changeSign();
4849 auto Ret = add(RHS, RM);
4850 changeSign();
4851 return Ret;
4852}
4853
4854APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4855 APFloat::roundingMode RM) {
4856 const auto &LHS = *this;
4857 auto &Out = *this;
4858 /* Interesting observation: For special categories, finding the lowest
4859 common ancestor of the following layered graph gives the correct
4860 return category:
4861
4862 NaN
4863 / \
4864 Zero Inf
4865 \ /
4866 Normal
4867
4868 e.g. NaN * NaN = NaN
4869 Zero * Inf = NaN
4870 Normal * Zero = Zero
4871 Normal * Inf = Inf
4872 */
4873 if (LHS.getCategory() == fcNaN) {
4874 Out = LHS;
4875 return opOK;
4876 }
4877 if (RHS.getCategory() == fcNaN) {
4878 Out = RHS;
4879 return opOK;
4880 }
4881 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4882 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4883 Out.makeNaN(SNaN: false, Neg: false, fill: nullptr);
4884 return opOK;
4885 }
4886 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4887 Out = LHS;
4888 return opOK;
4889 }
4890 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4891 Out = RHS;
4892 return opOK;
4893 }
4894 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4895 "Special cases not handled exhaustively");
4896
4897 int Status = opOK;
4898 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4899 // t = a * c
4900 APFloat T = A;
4901 Status |= T.multiply(RHS: C, RM);
4902 if (!T.isFiniteNonZero()) {
4903 Floats[0] = std::move(T);
4904 Floats[1].makeZero(/* Neg = */ false);
4905 return (opStatus)Status;
4906 }
4907
4908 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4909 APFloat Tau = A;
4910 T.changeSign();
4911 Status |= Tau.fusedMultiplyAdd(Multiplicand: C, Addend: T, RM);
4912 T.changeSign();
4913 {
4914 // v = a * d
4915 APFloat V = A;
4916 Status |= V.multiply(RHS: D, RM);
4917 // w = b * c
4918 APFloat W = B;
4919 Status |= W.multiply(RHS: C, RM);
4920 Status |= V.add(RHS: W, RM);
4921 // tau += v + w
4922 Status |= Tau.add(RHS: V, RM);
4923 }
4924 // u = t + tau
4925 APFloat U = T;
4926 Status |= U.add(RHS: Tau, RM);
4927
4928 Floats[0] = U;
4929 if (!U.isFinite()) {
4930 Floats[1].makeZero(/* Neg = */ false);
4931 } else {
4932 // Floats[1] = (t - u) + tau
4933 Status |= T.subtract(RHS: U, RM);
4934 Status |= T.add(RHS: Tau, RM);
4935 Floats[1] = std::move(T);
4936 }
4937 return (opStatus)Status;
4938}
4939
4940APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4941 APFloat::roundingMode RM) {
4942 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4943 "Unexpected Semantics");
4944 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4945 auto Ret = Tmp.divide(
4946 RHS: APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4947 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4948 return Ret;
4949}
4950
4951APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4952 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4953 "Unexpected Semantics");
4954 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4955 auto Ret = Tmp.remainder(
4956 RHS: APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4957 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4958 return Ret;
4959}
4960
4961APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4962 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4963 "Unexpected Semantics");
4964 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4965 auto Ret = Tmp.mod(
4966 RHS: APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4967 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4968 return Ret;
4969}
4970
4971APFloat::opStatus
4972DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4973 const DoubleAPFloat &Addend,
4974 APFloat::roundingMode RM) {
4975 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4976 "Unexpected Semantics");
4977 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4978 auto Ret = Tmp.fusedMultiplyAdd(
4979 Multiplicand: APFloat(APFloatBase::semPPCDoubleDoubleLegacy,
4980 Multiplicand.bitcastToAPInt()),
4981 Addend: APFloat(APFloatBase::semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()),
4982 RM);
4983 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4984 return Ret;
4985}
4986
4987APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4988 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4989 "Unexpected Semantics");
4990 const APFloat &Hi = getFirst();
4991 const APFloat &Lo = getSecond();
4992
4993 APFloat RoundedHi = Hi;
4994 const opStatus HiStatus = RoundedHi.roundToIntegral(RM);
4995
4996 // We can reduce the problem to just the high part if the input:
4997 // 1. Represents a non-finite value.
4998 // 2. Has a component which is zero.
4999 if (!Hi.isFiniteNonZero() || Lo.isZero()) {
5000 Floats[0] = std::move(RoundedHi);
5001 Floats[1].makeZero(/*Neg=*/false);
5002 return HiStatus;
5003 }
5004
5005 // Adjust `Rounded` in the direction of `TieBreaker` if `ToRound` was at a
5006 // halfway point.
5007 auto RoundToNearestHelper = [](APFloat ToRound, APFloat Rounded,
5008 APFloat TieBreaker) {
5009 // RoundingError tells us which direction we rounded:
5010 // - RoundingError > 0: we rounded up.
5011 // - RoundingError < 0: we rounded down.
5012 // Sterbenz' lemma ensures that RoundingError is exact.
5013 const APFloat RoundingError = Rounded - ToRound;
5014 if (TieBreaker.isNonZero() &&
5015 TieBreaker.isNegative() != RoundingError.isNegative() &&
5016 abs(X: RoundingError).isExactlyValue(V: 0.5))
5017 Rounded.add(
5018 RHS: APFloat::getOne(Sem: Rounded.getSemantics(), Negative: TieBreaker.isNegative()),
5019 RM: rmNearestTiesToEven);
5020 return Rounded;
5021 };
5022
5023 // Case 1: Hi is not an integer.
5024 // Special cases are for rounding modes that are sensitive to ties.
5025 if (RoundedHi != Hi) {
5026 // We need to consider the case where Hi was between two integers and the
5027 // rounding mode broke the tie when, in fact, Lo may have had a different
5028 // sign than Hi.
5029 if (RM == rmNearestTiesToAway || RM == rmNearestTiesToEven)
5030 RoundedHi = RoundToNearestHelper(Hi, RoundedHi, Lo);
5031
5032 Floats[0] = std::move(RoundedHi);
5033 Floats[1].makeZero(/*Neg=*/false);
5034 return HiStatus;
5035 }
5036
5037 // Case 2: Hi is an integer.
5038 // Special cases are for rounding modes which are rounding towards or away from zero.
5039 RoundingMode LoRoundingMode;
5040 if (RM == rmTowardZero)
5041 // When our input is positive, we want the Lo component rounded toward
5042 // negative infinity to get the smallest result magnitude. Likewise,
5043 // negative inputs want the Lo component rounded toward positive infinity.
5044 LoRoundingMode = isNegative() ? rmTowardPositive : rmTowardNegative;
5045 else
5046 LoRoundingMode = RM;
5047
5048 APFloat RoundedLo = Lo;
5049 const opStatus LoStatus = RoundedLo.roundToIntegral(RM: LoRoundingMode);
5050 if (LoRoundingMode == rmNearestTiesToAway)
5051 // We need to consider the case where Lo was between two integers and the
5052 // rounding mode broke the tie when, in fact, Hi may have had a different
5053 // sign than Lo.
5054 RoundedLo = RoundToNearestHelper(Lo, RoundedLo, Hi);
5055
5056 // We must ensure that the final result has no overlap between the two APFloat values.
5057 std::tie(args&: RoundedHi, args&: RoundedLo) = fastTwoSum(X: RoundedHi, Y: RoundedLo);
5058
5059 Floats[0] = std::move(RoundedHi);
5060 Floats[1] = std::move(RoundedLo);
5061 return LoStatus;
5062}
5063
5064void DoubleAPFloat::changeSign() {
5065 Floats[0].changeSign();
5066 Floats[1].changeSign();
5067}
5068
5069APFloat::cmpResult
5070DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
5071 // Compare absolute values of the high parts.
5072 const cmpResult HiPartCmp = Floats[0].compareAbsoluteValue(RHS: RHS.Floats[0]);
5073 if (HiPartCmp != cmpEqual)
5074 return HiPartCmp;
5075
5076 // Zero, regardless of sign, is equal.
5077 if (Floats[1].isZero() && RHS.Floats[1].isZero())
5078 return cmpEqual;
5079
5080 // At this point, |this->Hi| == |RHS.Hi|.
5081 // The magnitude is |Hi+Lo| which is Hi+|Lo| if signs of Hi and Lo are the
5082 // same, and Hi-|Lo| if signs are different.
5083 const bool ThisIsSubtractive =
5084 Floats[0].isNegative() != Floats[1].isNegative();
5085 const bool RHSIsSubtractive =
5086 RHS.Floats[0].isNegative() != RHS.Floats[1].isNegative();
5087
5088 // Case 1: The low part of 'this' is zero.
5089 if (Floats[1].isZero())
5090 // We are comparing |Hi| vs. |Hi| ± |RHS.Lo|.
5091 // If RHS is subtractive, its magnitude is smaller.
5092 // If RHS is additive, its magnitude is larger.
5093 return RHSIsSubtractive ? cmpGreaterThan : cmpLessThan;
5094
5095 // Case 2: The low part of 'RHS' is zero (and we know 'this' is not).
5096 if (RHS.Floats[1].isZero())
5097 // We are comparing |Hi| ± |This.Lo| vs. |Hi|.
5098 // If 'this' is subtractive, its magnitude is smaller.
5099 // If 'this' is additive, its magnitude is larger.
5100 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5101
5102 // If their natures differ, the additive one is larger.
5103 if (ThisIsSubtractive != RHSIsSubtractive)
5104 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5105
5106 // Case 3: Both are additive (Hi+|Lo|) or both are subtractive (Hi-|Lo|).
5107 // The comparison now depends on the magnitude of the low parts.
5108 const cmpResult LoPartCmp = Floats[1].compareAbsoluteValue(RHS: RHS.Floats[1]);
5109
5110 if (ThisIsSubtractive) {
5111 // Both are subtractive (Hi-|Lo|), so the comparison of |Lo| is inverted.
5112 if (LoPartCmp == cmpLessThan)
5113 return cmpGreaterThan;
5114 if (LoPartCmp == cmpGreaterThan)
5115 return cmpLessThan;
5116 }
5117
5118 // If additive, the comparison of |Lo| is direct.
5119 // If equal, they are equal.
5120 return LoPartCmp;
5121}
5122
5123APFloat::fltCategory DoubleAPFloat::getCategory() const {
5124 return Floats[0].getCategory();
5125}
5126
5127bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5128
5129void DoubleAPFloat::makeInf(bool Neg) {
5130 Floats[0].makeInf(Neg);
5131 Floats[1].makeZero(/* Neg = */ false);
5132}
5133
5134void DoubleAPFloat::makeZero(bool Neg) {
5135 Floats[0].makeZero(Neg);
5136 Floats[1].makeZero(/* Neg = */ false);
5137}
5138
5139void DoubleAPFloat::makeLargest(bool Neg) {
5140 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5141 "Unexpected Semantics");
5142 Floats[0] =
5143 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5144 Floats[1] =
5145 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5146 if (Neg)
5147 changeSign();
5148}
5149
5150void DoubleAPFloat::makeSmallest(bool Neg) {
5151 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5152 "Unexpected Semantics");
5153 Floats[0].makeSmallest(Neg);
5154 Floats[1].makeZero(/* Neg = */ false);
5155}
5156
5157void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
5158 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5159 "Unexpected Semantics");
5160 Floats[0] =
5161 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x0360000000000000ull));
5162 if (Neg)
5163 Floats[0].changeSign();
5164 Floats[1].makeZero(/* Neg = */ false);
5165}
5166
5167void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5168 Floats[0].makeNaN(SNaN, Neg, fill);
5169 Floats[1].makeZero(/* Neg = */ false);
5170}
5171
5172APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
5173 auto Result = Floats[0].compare(RHS: RHS.Floats[0]);
5174 // |Float[0]| > |Float[1]|
5175 if (Result == APFloat::cmpEqual)
5176 return Floats[1].compare(RHS: RHS.Floats[1]);
5177 return Result;
5178}
5179
5180bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
5181 return Floats[0].bitwiseIsEqual(RHS: RHS.Floats[0]) &&
5182 Floats[1].bitwiseIsEqual(RHS: RHS.Floats[1]);
5183}
5184
5185hash_code hash_value(const DoubleAPFloat &Arg) {
5186 if (Arg.Floats)
5187 return hash_combine(args: hash_value(Arg: Arg.Floats[0]), args: hash_value(Arg: Arg.Floats[1]));
5188 return hash_combine(args: Arg.Semantics);
5189}
5190
5191APInt DoubleAPFloat::bitcastToAPInt() const {
5192 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5193 "Unexpected Semantics");
5194 uint64_t Data[] = {
5195 Floats[0].bitcastToAPInt().getRawData()[0],
5196 Floats[1].bitcastToAPInt().getRawData()[0],
5197 };
5198 return APInt(128, Data);
5199}
5200
5201Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
5202 roundingMode RM) {
5203 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5204 "Unexpected Semantics");
5205 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy);
5206 auto Ret = Tmp.convertFromString(S, RM);
5207 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5208 return Ret;
5209}
5210
5211// The double-double lattice of values corresponds to numbers which obey:
5212// - abs(lo) <= 1/2 * ulp(hi)
5213// - roundTiesToEven(hi + lo) == hi
5214//
5215// nextUp must choose the smallest output > input that follows these rules.
5216// nexDown must choose the largest output < input that follows these rules.
5217APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
5218 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5219 "Unexpected Semantics");
5220 // nextDown(x) = -nextUp(-x)
5221 if (nextDown) {
5222 changeSign();
5223 APFloat::opStatus Result = next(/*nextDown=*/false);
5224 changeSign();
5225 return Result;
5226 }
5227 switch (getCategory()) {
5228 case fcInfinity:
5229 // nextUp(+inf) = +inf
5230 // nextUp(-inf) = -getLargest()
5231 if (isNegative())
5232 makeLargest(Neg: true);
5233 return opOK;
5234
5235 case fcNaN:
5236 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
5237 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
5238 // change the payload.
5239 if (getFirst().isSignaling()) {
5240 // For consistency, propagate the sign of the sNaN to the qNaN.
5241 makeNaN(SNaN: false, Neg: isNegative(), fill: nullptr);
5242 return opInvalidOp;
5243 }
5244 return opOK;
5245
5246 case fcZero:
5247 // nextUp(pm 0) = +getSmallest()
5248 makeSmallest(Neg: false);
5249 return opOK;
5250
5251 case fcNormal:
5252 break;
5253 }
5254
5255 const APFloat &HiOld = getFirst();
5256 const APFloat &LoOld = getSecond();
5257
5258 APFloat NextLo = LoOld;
5259 NextLo.next(/*nextDown=*/false);
5260
5261 // We want to admit values where:
5262 // 1. abs(Lo) <= ulp(Hi)/2
5263 // 2. Hi == RTNE(Hi + lo)
5264 auto InLattice = [](const APFloat &Hi, const APFloat &Lo) {
5265 return Hi + Lo == Hi;
5266 };
5267
5268 // Check if (HiOld, nextUp(LoOld) is in the lattice.
5269 if (InLattice(HiOld, NextLo)) {
5270 // Yes, the result is (HiOld, nextUp(LoOld)).
5271 Floats[1] = std::move(NextLo);
5272
5273 // TODO: Because we currently rely on semPPCDoubleDoubleLegacy, our maximum
5274 // value is defined to have exactly 106 bits of precision. This limitation
5275 // results in semPPCDoubleDouble being unable to reach its maximum canonical
5276 // value.
5277 DoubleAPFloat Largest{*Semantics, uninitialized};
5278 Largest.makeLargest(/*Neg=*/false);
5279 if (compare(RHS: Largest) == cmpGreaterThan)
5280 makeInf(/*Neg=*/false);
5281
5282 return opOK;
5283 }
5284
5285 // Now we need to handle the cases where (HiOld, nextUp(LoOld)) is not the
5286 // correct result. We know the new hi component will be nextUp(HiOld) but our
5287 // lattice rules make it a little ambiguous what the correct NextLo must be.
5288 APFloat NextHi = HiOld;
5289 NextHi.next(/*nextDown=*/false);
5290
5291 // nextUp(getLargest()) == INFINITY
5292 if (NextHi.isInfinity()) {
5293 makeInf(/*Neg=*/false);
5294 return opOK;
5295 }
5296
5297 // IEEE 754-2019 5.3.1:
5298 // "If x is the negative number of least magnitude in x's format, nextUp(x) is
5299 // -0."
5300 if (NextHi.isZero()) {
5301 makeZero(/*Neg=*/true);
5302 return opOK;
5303 }
5304
5305 // abs(NextLo) must be <= ulp(NextHi)/2. We want NextLo to be as close to
5306 // negative infinity as possible.
5307 NextLo = neg(X: scalbn(X: harrisonUlp(X: NextHi), Exp: -1, RM: rmTowardZero));
5308 if (!InLattice(NextHi, NextLo))
5309 // RTNE may mean that Lo must be < ulp(NextHi) / 2 so we bump NextLo.
5310 NextLo.next(/*nextDown=*/false);
5311
5312 Floats[0] = std::move(NextHi);
5313 Floats[1] = std::move(NextLo);
5314
5315 return opOK;
5316}
5317
5318APFloat::opStatus DoubleAPFloat::convertToSignExtendedInteger(
5319 MutableArrayRef<integerPart> Input, unsigned int Width, bool IsSigned,
5320 roundingMode RM, bool *IsExact) const {
5321 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5322 "Unexpected Semantics");
5323
5324 // If Hi is not finite, or Lo is zero, the value is entirely represented
5325 // by Hi. Delegate to the simpler single-APFloat conversion.
5326 if (!getFirst().isFiniteNonZero() || getSecond().isZero())
5327 return getFirst().convertToInteger(Input, Width, IsSigned, RM, IsExact);
5328
5329 // First, round the full double-double value to an integral value. This
5330 // simplifies the rest of the function, as we no longer need to consider
5331 // fractional parts.
5332 *IsExact = false;
5333 DoubleAPFloat Integral = *this;
5334 const opStatus RoundStatus = Integral.roundToIntegral(RM);
5335 if (RoundStatus == opInvalidOp)
5336 return opInvalidOp;
5337 const APFloat &IntegralHi = Integral.getFirst();
5338 const APFloat &IntegralLo = Integral.getSecond();
5339
5340 // If rounding results in either component being zero, the sum is trivial.
5341 // Delegate to the simpler single-APFloat conversion.
5342 bool HiIsExact;
5343 if (IntegralHi.isZero() || IntegralLo.isZero()) {
5344 const opStatus HiStatus =
5345 IntegralHi.convertToInteger(Input, Width, IsSigned, RM, IsExact: &HiIsExact);
5346 // The conversion from an integer-valued float to an APInt may fail if the
5347 // result would be out of range. Regardless, taking this path is only
5348 // possible if rounding occurred during the initial `roundToIntegral`.
5349 return HiStatus == opOK ? opInexact : HiStatus;
5350 }
5351
5352 // A negative number cannot be represented by an unsigned integer.
5353 // Since a double-double is canonical, if Hi is negative, the sum is negative.
5354 if (!IsSigned && IntegralHi.isNegative())
5355 return opInvalidOp;
5356
5357 // Handle the special boundary case where |Hi| is exactly the power of two
5358 // that marks the edge of the integer's range (e.g., 2^63 for int64_t). In
5359 // this situation, Hi itself won't fit, but the sum Hi + Lo might.
5360 // `PositiveOverflowWidth` is the bit number for this boundary (N-1 for
5361 // signed, N for unsigned).
5362 bool LoIsExact;
5363 const int HiExactLog2 = IntegralHi.getExactLog2Abs();
5364 const unsigned PositiveOverflowWidth = IsSigned ? Width - 1 : Width;
5365 if (HiExactLog2 >= 0 &&
5366 static_cast<unsigned>(HiExactLog2) == PositiveOverflowWidth) {
5367 // If Hi and Lo have the same sign, |Hi + Lo| > |Hi|, so the sum is
5368 // guaranteed to overflow. E.g., for uint128_t, (2^128, 1) overflows.
5369 if (IntegralHi.isNegative() == IntegralLo.isNegative())
5370 return opInvalidOp;
5371
5372 // If the signs differ, the sum will fit. We can compute the result using
5373 // properties of two's complement arithmetic without a wide intermediate
5374 // integer. E.g., for uint128_t, (2^128, -1) should be 2^128 - 1.
5375 const opStatus LoStatus = IntegralLo.convertToInteger(
5376 Input, Width, /*IsSigned=*/true, RM, IsExact: &LoIsExact);
5377 if (LoStatus == opInvalidOp)
5378 return opInvalidOp;
5379
5380 // Adjust the bit pattern of Lo to account for Hi's value:
5381 // - For unsigned (Hi=2^Width): `2^Width + Lo` in `Width`-bit
5382 // arithmetic is equivalent to just `Lo`. The conversion of `Lo` above
5383 // already produced the correct final bit pattern.
5384 // - For signed (Hi=2^(Width-1)): The sum `2^(Width-1) + Lo` (where Lo<0)
5385 // can be computed by taking the two's complement pattern for `Lo` and
5386 // clearing the sign bit.
5387 if (IsSigned && !IntegralHi.isNegative())
5388 APInt::tcClearBit(Input.data(), bit: PositiveOverflowWidth);
5389 *IsExact = RoundStatus == opOK;
5390 return RoundStatus;
5391 }
5392
5393 // Convert Hi into an integer. This may not fit but that is OK: we know that
5394 // Hi + Lo would not fit either in this situation.
5395 const opStatus HiStatus = IntegralHi.convertToInteger(
5396 Input, Width, IsSigned, RM: rmTowardZero, IsExact: &HiIsExact);
5397 if (HiStatus == opInvalidOp)
5398 return HiStatus;
5399
5400 // Convert Lo into a temporary integer of the same width.
5401 APSInt LoResult{Width, /*isUnsigned=*/!IsSigned};
5402 const opStatus LoStatus =
5403 IntegralLo.convertToInteger(Result&: LoResult, RM: rmTowardZero, IsExact: &LoIsExact);
5404 if (LoStatus == opInvalidOp)
5405 return LoStatus;
5406
5407 // Add Lo to Hi. This addition is guaranteed not to overflow because of the
5408 // double-double canonicalization rule (`|Lo| <= ulp(Hi)/2`). The only case
5409 // where the sum could cross the integer type's boundary is when Hi is a
5410 // power of two, which is handled by the special case block above.
5411 APInt::tcAdd(Input.data(), LoResult.getRawData(), /*carry=*/0, Input.size());
5412
5413 *IsExact = RoundStatus == opOK;
5414 return RoundStatus;
5415}
5416
5417APFloat::opStatus
5418DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
5419 unsigned int Width, bool IsSigned,
5420 roundingMode RM, bool *IsExact) const {
5421 opStatus FS =
5422 convertToSignExtendedInteger(Input, Width, IsSigned, RM, IsExact);
5423
5424 if (FS == opInvalidOp) {
5425 const unsigned DstPartsCount = partCountForBits(bits: Width);
5426 assert(DstPartsCount <= Input.size() && "Integer too big");
5427
5428 unsigned Bits;
5429 if (getCategory() == fcNaN)
5430 Bits = 0;
5431 else if (isNegative())
5432 Bits = IsSigned;
5433 else
5434 Bits = Width - IsSigned;
5435
5436 tcSetLeastSignificantBits(dst: Input.data(), parts: DstPartsCount, bits: Bits);
5437 if (isNegative() && IsSigned)
5438 APInt::tcShiftLeft(Input.data(), Words: DstPartsCount, Count: Width - 1);
5439 }
5440
5441 return FS;
5442}
5443
5444APFloat::opStatus DoubleAPFloat::handleOverflow(roundingMode RM) {
5445 switch (RM) {
5446 case APFloat::rmTowardZero:
5447 makeLargest(/*Neg=*/isNegative());
5448 break;
5449 case APFloat::rmTowardNegative:
5450 if (isNegative())
5451 makeInf(/*Neg=*/true);
5452 else
5453 makeLargest(/*Neg=*/false);
5454 break;
5455 case APFloat::rmTowardPositive:
5456 if (isNegative())
5457 makeLargest(/*Neg=*/true);
5458 else
5459 makeInf(/*Neg=*/false);
5460 break;
5461 case APFloat::rmNearestTiesToAway:
5462 case APFloat::rmNearestTiesToEven:
5463 makeInf(/*Neg=*/isNegative());
5464 break;
5465 default:
5466 llvm_unreachable("Invalid rounding mode found");
5467 }
5468 opStatus S = opInexact;
5469 if (!getFirst().isFinite())
5470 S = static_cast<opStatus>(S | opOverflow);
5471 return S;
5472}
5473
5474APFloat::opStatus DoubleAPFloat::convertFromUnsignedParts(
5475 const integerPart *Src, unsigned int SrcCount, roundingMode RM) {
5476 // Find the most significant bit of the source integer. APInt::tcMSB returns
5477 // UINT_MAX for a zero value.
5478 const unsigned SrcMSB = APInt::tcMSB(parts: Src, n: SrcCount);
5479 if (SrcMSB == UINT_MAX) {
5480 // The source integer is 0.
5481 makeZero(/*Neg=*/false);
5482 return opOK;
5483 }
5484
5485 // Create a minimally-sized APInt to represent the source value.
5486 const unsigned SrcBitWidth = SrcMSB + 1;
5487 APSInt SrcInt{APInt{/*numBits=*/SrcBitWidth, ArrayRef(Src, SrcCount)},
5488 /*isUnsigned=*/true};
5489
5490 // Stage 1: Initial Approximation.
5491 // Convert the source integer SrcInt to the Hi part of the DoubleAPFloat.
5492 // We use round-to-nearest because it minimizes the initial error, which is
5493 // crucial for the subsequent steps.
5494 APFloat Hi{getFirst().getSemantics()};
5495 Hi.convertFromAPInt(Input: SrcInt, /*IsSigned=*/false, RM: rmNearestTiesToEven);
5496
5497 // If the first approximation already overflows, the number is too large.
5498 // NOTE: The underlying semantics are *more* conservative when choosing to
5499 // overflow because their notion of ULP is much larger. As such, it is always
5500 // safe to overflow at the DoubleAPFloat level if the APFloat overflows.
5501 if (!Hi.isFinite())
5502 return handleOverflow(RM);
5503
5504 // Stage 2: Exact Error Calculation.
5505 // Calculate the exact error of the first approximation: Error = SrcInt - Hi.
5506 // This is done by converting Hi back to an integer and subtracting it from
5507 // the original source.
5508 bool HiAsIntIsExact;
5509 // Create an integer representation of Hi. Its width is determined by the
5510 // exponent of Hi, ensuring it's just large enough. This width can exceed
5511 // SrcBitWidth if the conversion to Hi rounded up to a power of two.
5512 // accurately when converted back to an integer.
5513 APSInt HiAsInt{static_cast<uint32_t>(ilogb(Arg: Hi) + 1), /*isUnsigned=*/true};
5514 Hi.convertToInteger(Result&: HiAsInt, RM: rmNearestTiesToEven, IsExact: &HiAsIntIsExact);
5515 const APInt Error = SrcInt.zext(width: HiAsInt.getBitWidth()) - HiAsInt;
5516
5517 // Stage 3: Error Approximation and Rounding.
5518 // Convert the integer error into the Lo part of the DoubleAPFloat. This step
5519 // captures the remainder of the original number. The rounding mode for this
5520 // conversion (LoRM) may need to be adjusted from the user-requested RM to
5521 // ensure the final sum (Hi + Lo) rounds correctly.
5522 roundingMode LoRM = RM;
5523 // Adjustments are only necessary when the initial approximation Hi was an
5524 // overestimate, making the Error negative.
5525 if (Error.isNegative()) {
5526 if (RM == rmNearestTiesToAway) {
5527 // For rmNearestTiesToAway, a tie should round away from zero. Since
5528 // SrcInt is positive, this means rounding toward +infinity.
5529 // A standard conversion of a negative Error would round ties toward
5530 // -infinity, causing the final sum Hi + Lo to be smaller. To
5531 // counteract this, we detect the tie case and override the rounding
5532 // mode for Lo to rmTowardPositive.
5533 const unsigned ErrorActiveBits = Error.getSignificantBits() - 1;
5534 const unsigned LoPrecision = getSecond().getSemantics().precision;
5535 if (ErrorActiveBits > LoPrecision) {
5536 const unsigned RoundingBoundary = ErrorActiveBits - LoPrecision;
5537 // A tie occurs when the bits to be truncated are of the form 100...0.
5538 // This is detected by checking if the number of trailing zeros is
5539 // exactly one less than the number of bits being truncated.
5540 if (Error.countTrailingZeros() == RoundingBoundary - 1)
5541 LoRM = rmTowardPositive;
5542 }
5543 } else if (RM == rmTowardZero) {
5544 // For rmTowardZero, the final positive result must be truncated (rounded
5545 // down). When Hi is an overestimate, Error is negative. A standard
5546 // rmTowardZero conversion of Error would make it *less* negative,
5547 // effectively rounding the final sum Hi + Lo *up*. To ensure the sum
5548 // rounds down correctly, we force Lo to round toward -infinity.
5549 LoRM = rmTowardNegative;
5550 }
5551 }
5552
5553 APFloat Lo{getSecond().getSemantics()};
5554 opStatus Status = Lo.convertFromAPInt(Input: Error, /*IsSigned=*/true, RM: LoRM);
5555
5556 // Renormalize the pair (Hi, Lo) into a canonical DoubleAPFloat form where the
5557 // components do not overlap. fastTwoSum performs this operation.
5558 std::tie(args&: Hi, args&: Lo) = fastTwoSum(X: Hi, Y: Lo);
5559 Floats[0] = std::move(Hi);
5560 Floats[1] = std::move(Lo);
5561
5562 // A final check for overflow is needed because fastTwoSum can cause a
5563 // carry-out from Lo that pushes Hi to infinity.
5564 if (!getFirst().isFinite())
5565 return handleOverflow(RM);
5566
5567 // The largest DoubleAPFloat must be canonical. Values which are larger are
5568 // not canonical and are equivalent to overflow.
5569 if (getFirst().isFiniteNonZero() && Floats[0].isLargest()) {
5570 DoubleAPFloat Largest{*Semantics};
5571 Largest.makeLargest(/*Neg=*/false);
5572 if (compare(RHS: Largest) == APFloat::cmpGreaterThan)
5573 return handleOverflow(RM);
5574 }
5575
5576 // The final status of the operation is determined by the conversion of the
5577 // error term. If Lo could represent Error exactly, the entire conversion
5578 // is exact. Otherwise, it's inexact.
5579 return Status;
5580}
5581
5582APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
5583 bool IsSigned,
5584 roundingMode RM) {
5585 const bool NegateInput = IsSigned && Input.isNegative();
5586 APInt API = Input;
5587 if (NegateInput)
5588 API.negate();
5589
5590 const APFloat::opStatus Status =
5591 convertFromUnsignedParts(Src: API.getRawData(), SrcCount: API.getNumWords(), RM);
5592 if (NegateInput)
5593 changeSign();
5594 return Status;
5595}
5596
5597unsigned int DoubleAPFloat::convertToHexString(char *DST,
5598 unsigned int HexDigits,
5599 bool UpperCase,
5600 roundingMode RM) const {
5601 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5602 "Unexpected Semantics");
5603 return APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5604 .convertToHexString(DST, HexDigits, UpperCase, RM);
5605}
5606
5607bool DoubleAPFloat::isDenormal() const {
5608 return getCategory() == fcNormal &&
5609 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5610 // (double)(Hi + Lo) == Hi defines a normal number.
5611 Floats[0] != Floats[0] + Floats[1]);
5612}
5613
5614bool DoubleAPFloat::isSmallest() const {
5615 if (getCategory() != fcNormal)
5616 return false;
5617 DoubleAPFloat Tmp(*this);
5618 Tmp.makeSmallest(Neg: this->isNegative());
5619 return Tmp.compare(RHS: *this) == cmpEqual;
5620}
5621
5622bool DoubleAPFloat::isSmallestNormalized() const {
5623 if (getCategory() != fcNormal)
5624 return false;
5625
5626 DoubleAPFloat Tmp(*this);
5627 Tmp.makeSmallestNormalized(Neg: this->isNegative());
5628 return Tmp.compare(RHS: *this) == cmpEqual;
5629}
5630
5631bool DoubleAPFloat::isLargest() const {
5632 if (getCategory() != fcNormal)
5633 return false;
5634 DoubleAPFloat Tmp(*this);
5635 Tmp.makeLargest(Neg: this->isNegative());
5636 return Tmp.compare(RHS: *this) == cmpEqual;
5637}
5638
5639bool DoubleAPFloat::isInteger() const {
5640 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5641 "Unexpected Semantics");
5642 return Floats[0].isInteger() && Floats[1].isInteger();
5643}
5644
5645void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
5646 unsigned FormatPrecision,
5647 unsigned FormatMaxPadding,
5648 bool TruncateZero) const {
5649 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5650 "Unexpected Semantics");
5651 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5652 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5653}
5654
5655int DoubleAPFloat::getExactLog2Abs() const {
5656 // In order for Hi + Lo to be a power of two, the following must be true:
5657 // 1. Hi must be a power of two.
5658 // 2. Lo must be zero.
5659 if (getSecond().isNonZero())
5660 return INT_MIN;
5661 return getFirst().getExactLog2Abs();
5662}
5663
5664int ilogb(const DoubleAPFloat &Arg) {
5665 const APFloat &Hi = Arg.getFirst();
5666 const APFloat &Lo = Arg.getSecond();
5667 int IlogbResult = ilogb(Arg: Hi);
5668 // Zero and non-finite values can delegate to ilogb(Hi).
5669 if (Arg.getCategory() != fcNormal)
5670 return IlogbResult;
5671 // If Lo can't change the binade, we can delegate to ilogb(Hi).
5672 if (Lo.isZero() || Hi.isNegative() == Lo.isNegative())
5673 return IlogbResult;
5674 if (Hi.getExactLog2Abs() == INT_MIN)
5675 return IlogbResult;
5676 // Numbers of the form 2^a - 2^b or -2^a + 2^b are almost powers of two but
5677 // get nudged out of the binade by the low component.
5678 return IlogbResult - 1;
5679}
5680
5681DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
5682 APFloat::roundingMode RM) {
5683 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5684 "Unexpected Semantics");
5685 return DoubleAPFloat(APFloatBase::PPCDoubleDouble(),
5686 scalbn(X: Arg.Floats[0], Exp, RM),
5687 scalbn(X: Arg.Floats[1], Exp, RM));
5688}
5689
5690DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5691 APFloat::roundingMode RM) {
5692 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5693 "Unexpected Semantics");
5694
5695 // Get the unbiased exponent e of the number, where |Arg| = m * 2^e for m in
5696 // [1.0, 2.0).
5697 Exp = ilogb(Arg);
5698
5699 // For NaNs, quiet any signaling NaN and return the result, as per standard
5700 // practice.
5701 if (Exp == APFloat::IEK_NaN) {
5702 DoubleAPFloat Quiet{Arg};
5703 Quiet.getFirst() = Quiet.getFirst().makeQuiet();
5704 return Quiet;
5705 }
5706
5707 // For infinity, return it unchanged. The exponent remains IEK_Inf.
5708 if (Exp == APFloat::IEK_Inf)
5709 return Arg;
5710
5711 // For zero, the fraction is zero and the standard requires the exponent be 0.
5712 if (Exp == APFloat::IEK_Zero) {
5713 Exp = 0;
5714 return Arg;
5715 }
5716
5717 const APFloat &Hi = Arg.getFirst();
5718 const APFloat &Lo = Arg.getSecond();
5719
5720 // frexp requires the fraction's absolute value to be in [0.5, 1.0).
5721 // ilogb provides an exponent for an absolute value in [1.0, 2.0).
5722 // Increment the exponent to ensure the fraction is in the correct range.
5723 ++Exp;
5724
5725 const bool SignsDisagree = Hi.isNegative() != Lo.isNegative();
5726 APFloat Second = Lo;
5727 if (Arg.getCategory() == APFloat::fcNormal && Lo.isFiniteNonZero()) {
5728 roundingMode LoRoundingMode;
5729 // The interpretation of rmTowardZero depends on the sign of the combined
5730 // Arg rather than the sign of the component.
5731 if (RM == rmTowardZero)
5732 LoRoundingMode = Arg.isNegative() ? rmTowardPositive : rmTowardNegative;
5733 // For rmNearestTiesToAway, we face a similar problem. If signs disagree,
5734 // Lo is a correction *toward* zero relative to Hi. Rounding Lo
5735 // "away from zero" based on its own sign would move the value in the
5736 // wrong direction. As a safe proxy, we use rmNearestTiesToEven, which is
5737 // direction-agnostic. We only need to bother with this if Lo is scaled
5738 // down.
5739 else if (RM == rmNearestTiesToAway && SignsDisagree && Exp > 0)
5740 LoRoundingMode = rmNearestTiesToEven;
5741 else
5742 LoRoundingMode = RM;
5743 Second = scalbn(X: Lo, Exp: -Exp, RM: LoRoundingMode);
5744 // The rmNearestTiesToEven proxy is correct most of the time, but it
5745 // differs from rmNearestTiesToAway when the scaled value of Lo is an
5746 // exact midpoint.
5747 // NOTE: This is morally equivalent to roundTiesTowardZero.
5748 if (RM == rmNearestTiesToAway && LoRoundingMode == rmNearestTiesToEven) {
5749 // Re-scale the result back to check if rounding occurred.
5750 const APFloat RecomposedLo = scalbn(X: Second, Exp, RM: rmNearestTiesToEven);
5751 if (RecomposedLo != Lo) {
5752 // RoundingError tells us which direction we rounded:
5753 // - RoundingError > 0: we rounded up.
5754 // - RoundingError < 0: we down up.
5755 const APFloat RoundingError = RecomposedLo - Lo;
5756 // Determine if scalbn(Lo, -Exp) landed exactly on a midpoint.
5757 // We do this by checking if the absolute rounding error is exactly
5758 // half a ULP of the result.
5759 const APFloat UlpOfSecond = harrisonUlp(X: Second);
5760 const APFloat ScaledUlpOfSecond =
5761 scalbn(X: UlpOfSecond, Exp: Exp - 1, RM: rmNearestTiesToEven);
5762 const bool IsMidpoint = abs(X: RoundingError) == ScaledUlpOfSecond;
5763 const bool RoundedLoAway =
5764 Second.isNegative() == RoundingError.isNegative();
5765 // The sign of Hi and Lo disagree and we rounded Lo away: we must
5766 // decrease the magnitude of Second to increase the magnitude
5767 // First+Second.
5768 if (IsMidpoint && RoundedLoAway)
5769 Second.next(/*nextDown=*/!Second.isNegative());
5770 }
5771 }
5772 // Handle a tricky edge case where Arg is slightly less than a power of two
5773 // (e.g., Arg = 2^k - epsilon). In this situation:
5774 // 1. Hi is 2^k, and Lo is a small negative value -epsilon.
5775 // 2. ilogb(Arg) correctly returns k-1.
5776 // 3. Our initial Exp becomes (k-1) + 1 = k.
5777 // 4. Scaling Hi (2^k) by 2^-k would yield a magnitude of 1.0 and
5778 // scaling Lo by 2^-k would yield zero. This would make the result 1.0
5779 // which is an invalid fraction, as the required interval is [0.5, 1.0).
5780 // We detect this specific case by checking if Hi is a power of two and if
5781 // the scaled Lo underflowed to zero. The fix: Increment Exp to k+1. This
5782 // adjusts the scale factor, causing Hi to be scaled to 0.5, which is a
5783 // valid fraction.
5784 if (Second.isZero() && SignsDisagree && Hi.getExactLog2Abs() != INT_MIN)
5785 ++Exp;
5786 }
5787
5788 APFloat First = scalbn(X: Hi, Exp: -Exp, RM);
5789 return DoubleAPFloat(APFloatBase::PPCDoubleDouble(), std::move(First),
5790 std::move(Second));
5791}
5792
5793APInt DoubleAPFloat::getNaNPayload() const { return Floats[0].getNaNPayload(); }
5794} // namespace detail
5795
5796APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5797 if (usesLayout<IEEEFloat>(Semantics)) {
5798 new (&IEEE) IEEEFloat(std::move(F));
5799 return;
5800 }
5801 if (usesLayout<DoubleAPFloat>(Semantics)) {
5802 const fltSemantics& S = F.getSemantics();
5803 new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5804 APFloat(APFloatBase::IEEEdouble()));
5805 return;
5806 }
5807 llvm_unreachable("Unexpected semantics");
5808}
5809
5810Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
5811 roundingMode RM) {
5812 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
5813}
5814
5815hash_code hash_value(const APFloat &Arg) {
5816 if (APFloat::usesLayout<detail::IEEEFloat>(Semantics: Arg.getSemantics()))
5817 return hash_value(Arg: Arg.U.IEEE);
5818 if (APFloat::usesLayout<detail::DoubleAPFloat>(Semantics: Arg.getSemantics()))
5819 return hash_value(Arg: Arg.U.Double);
5820 llvm_unreachable("Unexpected semantics");
5821}
5822
5823APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
5824 : APFloat(Semantics) {
5825 auto StatusOrErr = convertFromString(Str: S, RM: rmNearestTiesToEven);
5826 assert(StatusOrErr && "Invalid floating point representation");
5827 consumeError(Err: StatusOrErr.takeError());
5828}
5829
5830FPClassTest APFloat::classify() const {
5831 if (isZero())
5832 return isNegative() ? fcNegZero : fcPosZero;
5833 if (isNormal())
5834 return isNegative() ? fcNegNormal : fcPosNormal;
5835 if (isDenormal())
5836 return isNegative() ? fcNegSubnormal : fcPosSubnormal;
5837 if (isInfinity())
5838 return isNegative() ? fcNegInf : fcPosInf;
5839 assert(isNaN() && "Other class of FP constant");
5840 return isSignaling() ? fcSNan : fcQNan;
5841}
5842
5843bool APFloat::getExactInverse(APFloat *Inv) const {
5844 // Only finite, non-zero numbers can have a useful, representable inverse.
5845 // This check filters out +/- zero, +/- infinity, and NaN.
5846 if (!isFiniteNonZero())
5847 return false;
5848
5849 // Historically, this function rejects subnormal inputs. One reason why this
5850 // might be important is that subnormals may behave differently under FTZ/DAZ
5851 // runtime behavior.
5852 if (isDenormal())
5853 return false;
5854
5855 // A number has an exact, representable inverse if and only if it is a power
5856 // of two.
5857 //
5858 // Mathematical Rationale:
5859 // 1. A binary floating-point number x is a dyadic rational, meaning it can
5860 // be written as x = M / 2^k for integers M (the significand) and k.
5861 // 2. The inverse is 1/x = 2^k / M.
5862 // 3. For 1/x to also be a dyadic rational (and thus exactly representable
5863 // in binary), its denominator M must also be a power of two.
5864 // Let's say M = 2^m.
5865 // 4. Substituting this back into the formula for x, we get
5866 // x = (2^m) / (2^k) = 2^(m-k).
5867 //
5868 // This proves that x must be a power of two.
5869
5870 // getExactLog2Abs() returns the integer exponent if the number is a power of
5871 // two or INT_MIN if it is not.
5872 const int Exp = getExactLog2Abs();
5873 if (Exp == INT_MIN)
5874 return false;
5875
5876 // The inverse of +/- 2^Exp is +/- 2^(-Exp). We can compute this by
5877 // scaling 1.0 by the negated exponent.
5878 APFloat Reciprocal =
5879 scalbn(X: APFloat::getOne(Sem: getSemantics(), /*Negative=*/isNegative()), Exp: -Exp,
5880 RM: rmTowardZero);
5881
5882 // scalbn might round if the resulting exponent -Exp is outside the
5883 // representable range, causing overflow (to infinity) or underflow. We
5884 // must verify that the result is still the exact power of two we expect.
5885 if (Reciprocal.getExactLog2Abs() != -Exp)
5886 return false;
5887
5888 // Avoid multiplication with a subnormal, it is not safe on all platforms and
5889 // may be slower than a normal division.
5890 if (Reciprocal.isDenormal())
5891 return false;
5892
5893 assert(Reciprocal.isFiniteNonZero());
5894
5895 if (Inv)
5896 *Inv = std::move(Reciprocal);
5897
5898 return true;
5899}
5900
5901APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
5902 roundingMode RM, bool *losesInfo) {
5903 if (&getSemantics() == &ToSemantics) {
5904 *losesInfo = false;
5905 return opOK;
5906 }
5907 if (usesLayout<IEEEFloat>(Semantics: getSemantics()) &&
5908 usesLayout<IEEEFloat>(Semantics: ToSemantics))
5909 return U.IEEE.convert(toSemantics: ToSemantics, rounding_mode: RM, losesInfo);
5910 if (usesLayout<IEEEFloat>(Semantics: getSemantics()) &&
5911 usesLayout<DoubleAPFloat>(Semantics: ToSemantics)) {
5912 assert(&ToSemantics == &APFloatBase::semPPCDoubleDouble);
5913 auto Ret =
5914 U.IEEE.convert(toSemantics: APFloatBase::semPPCDoubleDoubleLegacy, rounding_mode: RM, losesInfo);
5915 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5916 return Ret;
5917 }
5918 if (usesLayout<DoubleAPFloat>(Semantics: getSemantics()) &&
5919 usesLayout<IEEEFloat>(Semantics: ToSemantics)) {
5920 auto Ret = getIEEE().convert(toSemantics: ToSemantics, rounding_mode: RM, losesInfo);
5921 *this = APFloat(std::move(getIEEE()), ToSemantics);
5922 return Ret;
5923 }
5924 llvm_unreachable("Unexpected semantics");
5925}
5926
5927APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
5928 return APFloat(Semantics, APInt::getAllOnes(numBits: Semantics.sizeInBits));
5929}
5930
5931void APFloat::print(raw_ostream &OS) const {
5932 SmallVector<char, 16> Buffer;
5933 toString(Str&: Buffer);
5934 OS << Buffer;
5935}
5936
5937#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5938LLVM_DUMP_METHOD void APFloat::dump() const {
5939 print(dbgs());
5940 dbgs() << '\n';
5941}
5942#endif
5943
5944void APFloat::Profile(FoldingSetNodeID &NID) const {
5945 NID.Add(x: bitcastToAPInt());
5946}
5947
5948APFloat::opStatus APFloat::convertToInteger(APSInt &result,
5949 roundingMode rounding_mode,
5950 bool *isExact) const {
5951 unsigned bitWidth = result.getBitWidth();
5952 SmallVector<uint64_t, 4> parts(result.getNumWords());
5953 opStatus status = convertToInteger(Input: parts, Width: bitWidth, IsSigned: result.isSigned(),
5954 RM: rounding_mode, IsExact: isExact);
5955 // Keeps the original signed-ness.
5956 result = APInt(bitWidth, parts);
5957 return status;
5958}
5959
5960double APFloat::convertToDouble() const {
5961 if (&getSemantics() == &APFloatBase::semIEEEdouble)
5962 return getIEEE().convertToDouble();
5963 assert(isRepresentableBy(getSemantics(), semIEEEdouble) &&
5964 "Float semantics is not representable by IEEEdouble");
5965 APFloat Temp = *this;
5966 bool LosesInfo;
5967 [[maybe_unused]] opStatus St =
5968 Temp.convert(ToSemantics: APFloatBase::semIEEEdouble, RM: rmNearestTiesToEven, losesInfo: &LosesInfo);
5969 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5970 return Temp.getIEEE().convertToDouble();
5971}
5972
5973#ifdef HAS_IEE754_FLOAT128
5974float128 APFloat::convertToQuad() const {
5975 if (&getSemantics() == &APFloatBase::semIEEEquad)
5976 return getIEEE().convertToQuad();
5977 assert(isRepresentableBy(getSemantics(), semIEEEquad) &&
5978 "Float semantics is not representable by IEEEquad");
5979 APFloat Temp = *this;
5980 bool LosesInfo;
5981 [[maybe_unused]] opStatus St =
5982 Temp.convert(ToSemantics: APFloatBase::semIEEEquad, RM: rmNearestTiesToEven, losesInfo: &LosesInfo);
5983 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5984 return Temp.getIEEE().convertToQuad();
5985}
5986#endif
5987
5988float APFloat::convertToFloat() const {
5989 if (&getSemantics() == &APFloatBase::semIEEEsingle)
5990 return getIEEE().convertToFloat();
5991 assert(isRepresentableBy(getSemantics(), semIEEEsingle) &&
5992 "Float semantics is not representable by IEEEsingle");
5993 APFloat Temp = *this;
5994 bool LosesInfo;
5995 [[maybe_unused]] opStatus St =
5996 Temp.convert(ToSemantics: APFloatBase::semIEEEsingle, RM: rmNearestTiesToEven, losesInfo: &LosesInfo);
5997 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5998 return Temp.getIEEE().convertToFloat();
5999}
6000
6001bool APFloatBase::isValidArbitraryFPFormat(StringRef Format) {
6002 static constexpr StringLiteral ValidFormats[] = {
6003 "Float8E5M2", "Float8E5M2FNUZ", "Float8E4M3", "Float8E4M3FN",
6004 "Float8E4M3FNUZ", "Float8E4M3B11FNUZ", "Float8E3M4", "Float8E8M0FNU",
6005 "Float6E3M2FN", "Float6E2M3FN", "Float4E2M1FN"};
6006 return llvm::is_contained(Range: ValidFormats, Element: Format);
6007}
6008
6009const fltSemantics *APFloatBase::getArbitraryFPSemantics(StringRef Format) {
6010 // TODO: extend to remaining arbitrary FP types: Float8E4M3, Float8E3M4,
6011 // Float8E5M2FNUZ, Float8E4M3FNUZ, Float8E4M3B11FNUZ, Float8E8M0FNU.
6012 return StringSwitch<const fltSemantics *>(Format)
6013 .Case(S: "Float8E5M2", Value: &semFloat8E5M2)
6014 .Case(S: "Float8E4M3FN", Value: &semFloat8E4M3FN)
6015 .Case(S: "Float4E2M1FN", Value: &semFloat4E2M1FN)
6016 .Case(S: "Float6E3M2FN", Value: &semFloat6E3M2FN)
6017 .Case(S: "Float6E2M3FN", Value: &semFloat6E2M3FN)
6018 .Default(Value: nullptr);
6019}
6020
6021APFloat::Storage::~Storage() {
6022 if (usesLayout<IEEEFloat>(Semantics: *semantics)) {
6023 IEEE.~IEEEFloat();
6024 return;
6025 }
6026 if (usesLayout<DoubleAPFloat>(Semantics: *semantics)) {
6027 Double.~DoubleAPFloat();
6028 return;
6029 }
6030 llvm_unreachable("Unexpected semantics");
6031}
6032
6033APFloat::Storage::Storage(const APFloat::Storage &RHS) {
6034 if (usesLayout<IEEEFloat>(Semantics: *RHS.semantics)) {
6035 new (this) IEEEFloat(RHS.IEEE);
6036 return;
6037 }
6038 if (usesLayout<DoubleAPFloat>(Semantics: *RHS.semantics)) {
6039 new (this) DoubleAPFloat(RHS.Double);
6040 return;
6041 }
6042 llvm_unreachable("Unexpected semantics");
6043}
6044
6045APFloat::Storage::Storage(APFloat::Storage &&RHS) {
6046 if (usesLayout<IEEEFloat>(Semantics: *RHS.semantics)) {
6047 new (this) IEEEFloat(std::move(RHS.IEEE));
6048 return;
6049 }
6050 if (usesLayout<DoubleAPFloat>(Semantics: *RHS.semantics)) {
6051 new (this) DoubleAPFloat(std::move(RHS.Double));
6052 return;
6053 }
6054 llvm_unreachable("Unexpected semantics");
6055}
6056
6057APFloat::Storage &APFloat::Storage::operator=(const APFloat::Storage &RHS) {
6058 if (usesLayout<IEEEFloat>(Semantics: *semantics) &&
6059 usesLayout<IEEEFloat>(Semantics: *RHS.semantics)) {
6060 IEEE = RHS.IEEE;
6061 } else if (usesLayout<DoubleAPFloat>(Semantics: *semantics) &&
6062 usesLayout<DoubleAPFloat>(Semantics: *RHS.semantics)) {
6063 Double = RHS.Double;
6064 } else if (this != &RHS) {
6065 this->~Storage();
6066 new (this) Storage(RHS);
6067 }
6068 return *this;
6069}
6070
6071APFloat::Storage &APFloat::Storage::operator=(APFloat::Storage &&RHS) {
6072 if (usesLayout<IEEEFloat>(Semantics: *semantics) &&
6073 usesLayout<IEEEFloat>(Semantics: *RHS.semantics)) {
6074 IEEE = std::move(RHS.IEEE);
6075 } else if (usesLayout<DoubleAPFloat>(Semantics: *semantics) &&
6076 usesLayout<DoubleAPFloat>(Semantics: *RHS.semantics)) {
6077 Double = std::move(RHS.Double);
6078 } else if (this != &RHS) {
6079 this->~Storage();
6080 new (this) Storage(std::move(RHS));
6081 }
6082 return *this;
6083}
6084
6085namespace {
6086
6087APFloat::opStatus getOpStatusFromLibc(int libc_exceptions) {
6088 APFloat::opStatus status = APFloat::opOK;
6089 if (libc_exceptions & FE_INVALID)
6090 status = static_cast<APFloat::opStatus>(status | APFloat::opInvalidOp);
6091 if (libc_exceptions & FE_DIVBYZERO)
6092 status = static_cast<APFloat::opStatus>(status | APFloat::opDivByZero);
6093 if (libc_exceptions & FE_OVERFLOW)
6094 status = static_cast<APFloat::opStatus>(status | APFloat::opOverflow);
6095 if (libc_exceptions & FE_UNDERFLOW)
6096 status = static_cast<APFloat::opStatus>(status | APFloat::opUnderflow);
6097 if (libc_exceptions & FE_INEXACT)
6098 status = static_cast<APFloat::opStatus>(status | APFloat::opInexact);
6099 return status;
6100}
6101
6102} // namespace
6103
6104// TODO: Support other rounding modes when LLVM libc math implement static
6105// roundings.
6106std::optional<APFloat> exp(const APFloat &x, RoundingMode rounding_mode,
6107 APFloat::opStatus *status) {
6108
6109 if (rounding_mode == APFloatBase::rmNearestTiesToEven) {
6110 if (APFloat::SemanticsToEnum(Sem: x.getSemantics()) ==
6111 APFloatBase::S_IEEEsingle) {
6112 float x_val = x.convertToFloat();
6113 int exc =
6114 LIBC_NAMESPACE::shared::check::exp_exceptions(x: x_val, FE_TONEAREST);
6115 if (status) {
6116 *status = getOpStatusFromLibc(libc_exceptions: exc);
6117 if (x.isSignaling()) {
6118 // 32-bit x86 will silence sNaN when loading floats, so we explicitly
6119 // add the INVALID exception here.
6120 *status =
6121 static_cast<APFloat::opStatus>(*status | APFloat::opInvalidOp);
6122 }
6123 }
6124 float result = LIBC_NAMESPACE::shared::expf(x: x_val);
6125 return APFloat(result);
6126 }
6127 if (APFloat::SemanticsToEnum(Sem: x.getSemantics()) ==
6128 APFloatBase::S_IEEEdouble) {
6129 double x_val = x.convertToDouble();
6130 int exc =
6131 LIBC_NAMESPACE::shared::check::exp_exceptions(x: x_val, FE_TONEAREST);
6132 if (status) {
6133 *status = getOpStatusFromLibc(libc_exceptions: exc);
6134 if (x.isSignaling()) {
6135 // 32-bit x86 will silence sNaN when loading floats, so we explicitly
6136 // add the INVALID exception here.
6137 *status =
6138 static_cast<APFloat::opStatus>(*status | APFloat::opInvalidOp);
6139 }
6140 }
6141 double result = LIBC_NAMESPACE::shared::exp(x: x_val);
6142 return APFloat(result);
6143 }
6144 }
6145 return std::nullopt;
6146}
6147
6148} // namespace llvm
6149
6150#undef APFLOAT_DISPATCH_ON_SEMANTICS
6151