1//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision floating
10// point values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/FloatingPointMode.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/Hashing.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/StringExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Config/llvm-config.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/Error.h"
26#include "llvm/Support/MathExtras.h"
27#include "llvm/Support/raw_ostream.h"
28#include <cstring>
29#include <limits.h>
30
31#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
32 do { \
33 if (usesLayout<IEEEFloat>(getSemantics())) \
34 return U.IEEE.METHOD_CALL; \
35 if (usesLayout<DoubleAPFloat>(getSemantics())) \
36 return U.Double.METHOD_CALL; \
37 llvm_unreachable("Unexpected semantics"); \
38 } while (false)
39
40using namespace llvm;
41
42/// A macro used to combine two fcCategory enums into one key which can be used
43/// in a switch statement to classify how the interaction of two APFloat's
44/// categories affects an operation.
45///
46/// TODO: If clang source code is ever allowed to use constexpr in its own
47/// codebase, change this into a static inline function.
48#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
49
50/* Assumed in hexadecimal significand parsing, and conversion to
51 hexadecimal strings. */
52static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53
54namespace llvm {
55
56// How the nonfinite values Inf and NaN are represented.
57enum class fltNonfiniteBehavior {
58 // Represents standard IEEE 754 behavior. A value is nonfinite if the
59 // exponent field is all 1s. In such cases, a value is Inf if the
60 // significand bits are all zero, and NaN otherwise
61 IEEE754,
62
63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65 // representation for Inf, and operations that would ordinarily produce Inf
66 // produce NaN instead.
67 // The details of the NaN representation(s) in this form are determined by the
68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69 // encodings do not distinguish between signalling and quiet NaN.
70 NanOnly,
71
72 // This behavior is present in Float6E3M2FN, Float6E2M3FN, and
73 // Float4E2M1FN types, which do not support Inf or NaN values.
74 FiniteOnly,
75};
76
77// How NaN values are represented. This is curently only used in combination
78// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
79// while having IEEE non-finite behavior is liable to lead to unexpected
80// results.
81enum class fltNanEncoding {
82 // Represents the standard IEEE behavior where a value is NaN if its
83 // exponent is all 1s and the significand is non-zero.
84 IEEE,
85
86 // Represents the behavior in the Float8E4M3FN floating point type where NaN
87 // is represented by having the exponent and mantissa set to all 1s.
88 // This behavior matches the FP8 E4M3 type described in
89 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
90 // as non-signalling, although the paper does not state whether the NaN
91 // values are signalling or not.
92 AllOnes,
93
94 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
95 // where NaN is represented by a sign bit of 1 and all 0s in the exponent
96 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
97 // there is only one NaN value, it is treated as quiet NaN. This matches the
98 // behavior described in https://arxiv.org/abs/2206.02915 .
99 NegativeZero,
100};
101
102/* Represents floating point arithmetic semantics. */
103struct fltSemantics {
104 /* The largest E such that 2^E is representable; this matches the
105 definition of IEEE 754. */
106 APFloatBase::ExponentType maxExponent;
107
108 /* The smallest E such that 2^E is a normalized number; this
109 matches the definition of IEEE 754. */
110 APFloatBase::ExponentType minExponent;
111
112 /* Number of bits in the significand. This includes the integer
113 bit. */
114 unsigned int precision;
115
116 /* Number of bits actually used in the semantics. */
117 unsigned int sizeInBits;
118
119 fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
120
121 fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
122
123 /* Whether this semantics has an encoding for Zero */
124 bool hasZero = true;
125
126 /* Whether this semantics can represent signed values */
127 bool hasSignedRepr = true;
128
129 /* Whether the sign bit of this semantics is the most significant bit */
130 bool hasSignBitInMSB = true;
131};
132
133static constexpr fltSemantics semIEEEhalf = {.maxExponent: 15, .minExponent: -14, .precision: 11, .sizeInBits: 16};
134static constexpr fltSemantics semBFloat = {.maxExponent: 127, .minExponent: -126, .precision: 8, .sizeInBits: 16};
135static constexpr fltSemantics semIEEEsingle = {.maxExponent: 127, .minExponent: -126, .precision: 24, .sizeInBits: 32};
136static constexpr fltSemantics semIEEEdouble = {.maxExponent: 1023, .minExponent: -1022, .precision: 53, .sizeInBits: 64};
137static constexpr fltSemantics semIEEEquad = {.maxExponent: 16383, .minExponent: -16382, .precision: 113, .sizeInBits: 128};
138static constexpr fltSemantics semFloat8E5M2 = {.maxExponent: 15, .minExponent: -14, .precision: 3, .sizeInBits: 8};
139static constexpr fltSemantics semFloat8E5M2FNUZ = {
140 .maxExponent: 15, .minExponent: -15, .precision: 3, .sizeInBits: 8, .nonFiniteBehavior: fltNonfiniteBehavior::NanOnly, .nanEncoding: fltNanEncoding::NegativeZero};
141static constexpr fltSemantics semFloat8E4M3 = {.maxExponent: 7, .minExponent: -6, .precision: 4, .sizeInBits: 8};
142static constexpr fltSemantics semFloat8E4M3FN = {
143 .maxExponent: 8, .minExponent: -6, .precision: 4, .sizeInBits: 8, .nonFiniteBehavior: fltNonfiniteBehavior::NanOnly, .nanEncoding: fltNanEncoding::AllOnes};
144static constexpr fltSemantics semFloat8E4M3FNUZ = {
145 .maxExponent: 7, .minExponent: -7, .precision: 4, .sizeInBits: 8, .nonFiniteBehavior: fltNonfiniteBehavior::NanOnly, .nanEncoding: fltNanEncoding::NegativeZero};
146static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
147 .maxExponent: 4, .minExponent: -10, .precision: 4, .sizeInBits: 8, .nonFiniteBehavior: fltNonfiniteBehavior::NanOnly, .nanEncoding: fltNanEncoding::NegativeZero};
148static constexpr fltSemantics semFloat8E3M4 = {.maxExponent: 3, .minExponent: -2, .precision: 5, .sizeInBits: 8};
149static constexpr fltSemantics semFloatTF32 = {.maxExponent: 127, .minExponent: -126, .precision: 11, .sizeInBits: 19};
150static constexpr fltSemantics semFloat8E8M0FNU = {.maxExponent: 127,
151 .minExponent: -127,
152 .precision: 1,
153 .sizeInBits: 8,
154 .nonFiniteBehavior: fltNonfiniteBehavior::NanOnly,
155 .nanEncoding: fltNanEncoding::AllOnes,
156 .hasZero: false,
157 .hasSignedRepr: false,
158 .hasSignBitInMSB: false};
159
160static constexpr fltSemantics semFloat6E3M2FN = {
161 .maxExponent: 4, .minExponent: -2, .precision: 3, .sizeInBits: 6, .nonFiniteBehavior: fltNonfiniteBehavior::FiniteOnly};
162static constexpr fltSemantics semFloat6E2M3FN = {
163 .maxExponent: 2, .minExponent: 0, .precision: 4, .sizeInBits: 6, .nonFiniteBehavior: fltNonfiniteBehavior::FiniteOnly};
164static constexpr fltSemantics semFloat4E2M1FN = {
165 .maxExponent: 2, .minExponent: 0, .precision: 2, .sizeInBits: 4, .nonFiniteBehavior: fltNonfiniteBehavior::FiniteOnly};
166static constexpr fltSemantics semX87DoubleExtended = {.maxExponent: 16383, .minExponent: -16382, .precision: 64, .sizeInBits: 80};
167static constexpr fltSemantics semBogus = {.maxExponent: 0, .minExponent: 0, .precision: 0, .sizeInBits: 0};
168static constexpr fltSemantics semPPCDoubleDouble = {.maxExponent: -1, .minExponent: 0, .precision: 0, .sizeInBits: 128};
169static constexpr fltSemantics semPPCDoubleDoubleLegacy = {.maxExponent: 1023, .minExponent: -1022 + 53,
170 .precision: 53 + 53, .sizeInBits: 128};
171
172const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
173 switch (S) {
174 case S_IEEEhalf:
175 return IEEEhalf();
176 case S_BFloat:
177 return BFloat();
178 case S_IEEEsingle:
179 return IEEEsingle();
180 case S_IEEEdouble:
181 return IEEEdouble();
182 case S_IEEEquad:
183 return IEEEquad();
184 case S_PPCDoubleDouble:
185 return PPCDoubleDouble();
186 case S_PPCDoubleDoubleLegacy:
187 return PPCDoubleDoubleLegacy();
188 case S_Float8E5M2:
189 return Float8E5M2();
190 case S_Float8E5M2FNUZ:
191 return Float8E5M2FNUZ();
192 case S_Float8E4M3:
193 return Float8E4M3();
194 case S_Float8E4M3FN:
195 return Float8E4M3FN();
196 case S_Float8E4M3FNUZ:
197 return Float8E4M3FNUZ();
198 case S_Float8E4M3B11FNUZ:
199 return Float8E4M3B11FNUZ();
200 case S_Float8E3M4:
201 return Float8E3M4();
202 case S_FloatTF32:
203 return FloatTF32();
204 case S_Float8E8M0FNU:
205 return Float8E8M0FNU();
206 case S_Float6E3M2FN:
207 return Float6E3M2FN();
208 case S_Float6E2M3FN:
209 return Float6E2M3FN();
210 case S_Float4E2M1FN:
211 return Float4E2M1FN();
212 case S_x87DoubleExtended:
213 return x87DoubleExtended();
214 }
215 llvm_unreachable("Unrecognised floating semantics");
216}
217
218APFloatBase::Semantics
219APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
220 if (&Sem == &llvm::APFloat::IEEEhalf())
221 return S_IEEEhalf;
222 else if (&Sem == &llvm::APFloat::BFloat())
223 return S_BFloat;
224 else if (&Sem == &llvm::APFloat::IEEEsingle())
225 return S_IEEEsingle;
226 else if (&Sem == &llvm::APFloat::IEEEdouble())
227 return S_IEEEdouble;
228 else if (&Sem == &llvm::APFloat::IEEEquad())
229 return S_IEEEquad;
230 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
231 return S_PPCDoubleDouble;
232 else if (&Sem == &llvm::APFloat::PPCDoubleDoubleLegacy())
233 return S_PPCDoubleDoubleLegacy;
234 else if (&Sem == &llvm::APFloat::Float8E5M2())
235 return S_Float8E5M2;
236 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
237 return S_Float8E5M2FNUZ;
238 else if (&Sem == &llvm::APFloat::Float8E4M3())
239 return S_Float8E4M3;
240 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
241 return S_Float8E4M3FN;
242 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
243 return S_Float8E4M3FNUZ;
244 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
245 return S_Float8E4M3B11FNUZ;
246 else if (&Sem == &llvm::APFloat::Float8E3M4())
247 return S_Float8E3M4;
248 else if (&Sem == &llvm::APFloat::FloatTF32())
249 return S_FloatTF32;
250 else if (&Sem == &llvm::APFloat::Float8E8M0FNU())
251 return S_Float8E8M0FNU;
252 else if (&Sem == &llvm::APFloat::Float6E3M2FN())
253 return S_Float6E3M2FN;
254 else if (&Sem == &llvm::APFloat::Float6E2M3FN())
255 return S_Float6E2M3FN;
256 else if (&Sem == &llvm::APFloat::Float4E2M1FN())
257 return S_Float4E2M1FN;
258 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
259 return S_x87DoubleExtended;
260 else
261 llvm_unreachable("Unknown floating semantics");
262}
263
264const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
265const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
266const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
267const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
268const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
269const fltSemantics &APFloatBase::PPCDoubleDouble() {
270 return semPPCDoubleDouble;
271}
272const fltSemantics &APFloatBase::PPCDoubleDoubleLegacy() {
273 return semPPCDoubleDoubleLegacy;
274}
275const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
276const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
277const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; }
278const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
279const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
280const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
281 return semFloat8E4M3B11FNUZ;
282}
283const fltSemantics &APFloatBase::Float8E3M4() { return semFloat8E3M4; }
284const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
285const fltSemantics &APFloatBase::Float8E8M0FNU() { return semFloat8E8M0FNU; }
286const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; }
287const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; }
288const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; }
289const fltSemantics &APFloatBase::x87DoubleExtended() {
290 return semX87DoubleExtended;
291}
292const fltSemantics &APFloatBase::Bogus() { return semBogus; }
293
294bool APFloatBase::isRepresentableBy(const fltSemantics &A,
295 const fltSemantics &B) {
296 return A.maxExponent <= B.maxExponent && A.minExponent >= B.minExponent &&
297 A.precision <= B.precision;
298}
299
300constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
301constexpr RoundingMode APFloatBase::rmTowardPositive;
302constexpr RoundingMode APFloatBase::rmTowardNegative;
303constexpr RoundingMode APFloatBase::rmTowardZero;
304constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
305
306/* A tight upper bound on number of parts required to hold the value
307 pow(5, power) is
308
309 power * 815 / (351 * integerPartWidth) + 1
310
311 However, whilst the result may require only this many parts,
312 because we are multiplying two values to get it, the
313 multiplication may require an extra part with the excess part
314 being zero (consider the trivial case of 1 * 1, tcFullMultiply
315 requires two parts to hold the single-part result). So we add an
316 extra one to guarantee enough space whilst multiplying. */
317const unsigned int maxExponent = 16383;
318const unsigned int maxPrecision = 113;
319const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
320const unsigned int maxPowerOfFiveParts =
321 2 +
322 ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
323
324unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
325 return semantics.precision;
326}
327APFloatBase::ExponentType
328APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
329 return semantics.maxExponent;
330}
331APFloatBase::ExponentType
332APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
333 return semantics.minExponent;
334}
335unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
336 return semantics.sizeInBits;
337}
338unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,
339 bool isSigned) {
340 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
341 // at least one more bit than the MaxExponent to hold the max FP value.
342 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
343 // Extra sign bit needed.
344 if (isSigned)
345 ++MinBitWidth;
346 return MinBitWidth;
347}
348
349bool APFloatBase::semanticsHasZero(const fltSemantics &semantics) {
350 return semantics.hasZero;
351}
352
353bool APFloatBase::semanticsHasSignedRepr(const fltSemantics &semantics) {
354 return semantics.hasSignedRepr;
355}
356
357bool APFloatBase::semanticsHasInf(const fltSemantics &semantics) {
358 return semantics.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754;
359}
360
361bool APFloatBase::semanticsHasNaN(const fltSemantics &semantics) {
362 return semantics.nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly;
363}
364
365bool APFloatBase::isIEEELikeFP(const fltSemantics &semantics) {
366 // Keep in sync with Type::isIEEELikeFPTy
367 return SemanticsToEnum(Sem: semantics) <= S_IEEEquad;
368}
369
370bool APFloatBase::hasSignBitInMSB(const fltSemantics &semantics) {
371 return semantics.hasSignBitInMSB;
372}
373
374bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,
375 const fltSemantics &Dst) {
376 // Exponent range must be larger.
377 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
378 return false;
379
380 // If the mantissa is long enough, the result value could still be denormal
381 // with a larger exponent range.
382 //
383 // FIXME: This condition is probably not accurate but also shouldn't be a
384 // practical concern with existing types.
385 return Dst.precision >= Src.precision;
386}
387
388unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
389 return Sem.sizeInBits;
390}
391
392static constexpr APFloatBase::ExponentType
393exponentZero(const fltSemantics &semantics) {
394 return semantics.minExponent - 1;
395}
396
397static constexpr APFloatBase::ExponentType
398exponentInf(const fltSemantics &semantics) {
399 return semantics.maxExponent + 1;
400}
401
402static constexpr APFloatBase::ExponentType
403exponentNaN(const fltSemantics &semantics) {
404 if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
405 if (semantics.nanEncoding == fltNanEncoding::NegativeZero)
406 return exponentZero(semantics);
407 if (semantics.hasSignedRepr)
408 return semantics.maxExponent;
409 }
410 return semantics.maxExponent + 1;
411}
412
413/* A bunch of private, handy routines. */
414
415static inline Error createError(const Twine &Err) {
416 return make_error<StringError>(Args: Err, Args: inconvertibleErrorCode());
417}
418
419static constexpr inline unsigned int partCountForBits(unsigned int bits) {
420 return std::max(a: 1u, b: (bits + APFloatBase::integerPartWidth - 1) /
421 APFloatBase::integerPartWidth);
422}
423
424/* Returns 0U-9U. Return values >= 10U are not digits. */
425static inline unsigned int
426decDigitValue(unsigned int c)
427{
428 return c - '0';
429}
430
431/* Return the value of a decimal exponent of the form
432 [+-]ddddddd.
433
434 If the exponent overflows, returns a large exponent with the
435 appropriate sign. */
436static Expected<int> readExponent(StringRef::iterator begin,
437 StringRef::iterator end) {
438 bool isNegative;
439 unsigned int absExponent;
440 const unsigned int overlargeExponent = 24000; /* FIXME. */
441 StringRef::iterator p = begin;
442
443 // Treat no exponent as 0 to match binutils
444 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
445 return 0;
446 }
447
448 isNegative = (*p == '-');
449 if (*p == '-' || *p == '+') {
450 p++;
451 if (p == end)
452 return createError(Err: "Exponent has no digits");
453 }
454
455 absExponent = decDigitValue(c: *p++);
456 if (absExponent >= 10U)
457 return createError(Err: "Invalid character in exponent");
458
459 for (; p != end; ++p) {
460 unsigned int value;
461
462 value = decDigitValue(c: *p);
463 if (value >= 10U)
464 return createError(Err: "Invalid character in exponent");
465
466 absExponent = absExponent * 10U + value;
467 if (absExponent >= overlargeExponent) {
468 absExponent = overlargeExponent;
469 break;
470 }
471 }
472
473 if (isNegative)
474 return -(int) absExponent;
475 else
476 return (int) absExponent;
477}
478
479/* This is ugly and needs cleaning up, but I don't immediately see
480 how whilst remaining safe. */
481static Expected<int> totalExponent(StringRef::iterator p,
482 StringRef::iterator end,
483 int exponentAdjustment) {
484 int unsignedExponent;
485 bool negative, overflow;
486 int exponent = 0;
487
488 if (p == end)
489 return createError(Err: "Exponent has no digits");
490
491 negative = *p == '-';
492 if (*p == '-' || *p == '+') {
493 p++;
494 if (p == end)
495 return createError(Err: "Exponent has no digits");
496 }
497
498 unsignedExponent = 0;
499 overflow = false;
500 for (; p != end; ++p) {
501 unsigned int value;
502
503 value = decDigitValue(c: *p);
504 if (value >= 10U)
505 return createError(Err: "Invalid character in exponent");
506
507 unsignedExponent = unsignedExponent * 10 + value;
508 if (unsignedExponent > 32767) {
509 overflow = true;
510 break;
511 }
512 }
513
514 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
515 overflow = true;
516
517 if (!overflow) {
518 exponent = unsignedExponent;
519 if (negative)
520 exponent = -exponent;
521 exponent += exponentAdjustment;
522 if (exponent > 32767 || exponent < -32768)
523 overflow = true;
524 }
525
526 if (overflow)
527 exponent = negative ? -32768: 32767;
528
529 return exponent;
530}
531
532static Expected<StringRef::iterator>
533skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
534 StringRef::iterator *dot) {
535 StringRef::iterator p = begin;
536 *dot = end;
537 while (p != end && *p == '0')
538 p++;
539
540 if (p != end && *p == '.') {
541 *dot = p++;
542
543 if (end - begin == 1)
544 return createError(Err: "Significand has no digits");
545
546 while (p != end && *p == '0')
547 p++;
548 }
549
550 return p;
551}
552
553/* Given a normal decimal floating point number of the form
554
555 dddd.dddd[eE][+-]ddd
556
557 where the decimal point and exponent are optional, fill out the
558 structure D. Exponent is appropriate if the significand is
559 treated as an integer, and normalizedExponent if the significand
560 is taken to have the decimal point after a single leading
561 non-zero digit.
562
563 If the value is zero, V->firstSigDigit points to a non-digit, and
564 the return exponent is zero.
565*/
566struct decimalInfo {
567 const char *firstSigDigit;
568 const char *lastSigDigit;
569 int exponent;
570 int normalizedExponent;
571};
572
573static Error interpretDecimal(StringRef::iterator begin,
574 StringRef::iterator end, decimalInfo *D) {
575 StringRef::iterator dot = end;
576
577 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, dot: &dot);
578 if (!PtrOrErr)
579 return PtrOrErr.takeError();
580 StringRef::iterator p = *PtrOrErr;
581
582 D->firstSigDigit = p;
583 D->exponent = 0;
584 D->normalizedExponent = 0;
585
586 for (; p != end; ++p) {
587 if (*p == '.') {
588 if (dot != end)
589 return createError(Err: "String contains multiple dots");
590 dot = p++;
591 if (p == end)
592 break;
593 }
594 if (decDigitValue(c: *p) >= 10U)
595 break;
596 }
597
598 if (p != end) {
599 if (*p != 'e' && *p != 'E')
600 return createError(Err: "Invalid character in significand");
601 if (p == begin)
602 return createError(Err: "Significand has no digits");
603 if (dot != end && p - begin == 1)
604 return createError(Err: "Significand has no digits");
605
606 /* p points to the first non-digit in the string */
607 auto ExpOrErr = readExponent(begin: p + 1, end);
608 if (!ExpOrErr)
609 return ExpOrErr.takeError();
610 D->exponent = *ExpOrErr;
611
612 /* Implied decimal point? */
613 if (dot == end)
614 dot = p;
615 }
616
617 /* If number is all zeroes accept any exponent. */
618 if (p != D->firstSigDigit) {
619 /* Drop insignificant trailing zeroes. */
620 if (p != begin) {
621 do
622 do
623 p--;
624 while (p != begin && *p == '0');
625 while (p != begin && *p == '.');
626 }
627
628 /* Adjust the exponents for any decimal point. */
629 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
630 D->normalizedExponent = (D->exponent +
631 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
632 - (dot > D->firstSigDigit && dot < p)));
633 }
634
635 D->lastSigDigit = p;
636 return Error::success();
637}
638
639/* Return the trailing fraction of a hexadecimal number.
640 DIGITVALUE is the first hex digit of the fraction, P points to
641 the next digit. */
642static Expected<lostFraction>
643trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
644 unsigned int digitValue) {
645 unsigned int hexDigit;
646
647 /* If the first trailing digit isn't 0 or 8 we can work out the
648 fraction immediately. */
649 if (digitValue > 8)
650 return lfMoreThanHalf;
651 else if (digitValue < 8 && digitValue > 0)
652 return lfLessThanHalf;
653
654 // Otherwise we need to find the first non-zero digit.
655 while (p != end && (*p == '0' || *p == '.'))
656 p++;
657
658 if (p == end)
659 return createError(Err: "Invalid trailing hexadecimal fraction!");
660
661 hexDigit = hexDigitValue(C: *p);
662
663 /* If we ran off the end it is exactly zero or one-half, otherwise
664 a little more. */
665 if (hexDigit == UINT_MAX)
666 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
667 else
668 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
669}
670
671/* Return the fraction lost were a bignum truncated losing the least
672 significant BITS bits. */
673static lostFraction
674lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
675 unsigned int partCount,
676 unsigned int bits)
677{
678 unsigned int lsb;
679
680 lsb = APInt::tcLSB(parts, n: partCount);
681
682 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
683 if (bits <= lsb)
684 return lfExactlyZero;
685 if (bits == lsb + 1)
686 return lfExactlyHalf;
687 if (bits <= partCount * APFloatBase::integerPartWidth &&
688 APInt::tcExtractBit(parts, bit: bits - 1))
689 return lfMoreThanHalf;
690
691 return lfLessThanHalf;
692}
693
694/* Shift DST right BITS bits noting lost fraction. */
695static lostFraction
696shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
697{
698 lostFraction lost_fraction;
699
700 lost_fraction = lostFractionThroughTruncation(parts: dst, partCount: parts, bits);
701
702 APInt::tcShiftRight(dst, Words: parts, Count: bits);
703
704 return lost_fraction;
705}
706
707/* Combine the effect of two lost fractions. */
708static lostFraction
709combineLostFractions(lostFraction moreSignificant,
710 lostFraction lessSignificant)
711{
712 if (lessSignificant != lfExactlyZero) {
713 if (moreSignificant == lfExactlyZero)
714 moreSignificant = lfLessThanHalf;
715 else if (moreSignificant == lfExactlyHalf)
716 moreSignificant = lfMoreThanHalf;
717 }
718
719 return moreSignificant;
720}
721
722/* The error from the true value, in half-ulps, on multiplying two
723 floating point numbers, which differ from the value they
724 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
725 than the returned value.
726
727 See "How to Read Floating Point Numbers Accurately" by William D
728 Clinger. */
729static unsigned int
730HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
731{
732 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
733
734 if (HUerr1 + HUerr2 == 0)
735 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
736 else
737 return inexactMultiply + 2 * (HUerr1 + HUerr2);
738}
739
740/* The number of ulps from the boundary (zero, or half if ISNEAREST)
741 when the least significant BITS are truncated. BITS cannot be
742 zero. */
743static APFloatBase::integerPart
744ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
745 bool isNearest) {
746 unsigned int count, partBits;
747 APFloatBase::integerPart part, boundary;
748
749 assert(bits != 0);
750
751 bits--;
752 count = bits / APFloatBase::integerPartWidth;
753 partBits = bits % APFloatBase::integerPartWidth + 1;
754
755 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
756
757 if (isNearest)
758 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
759 else
760 boundary = 0;
761
762 if (count == 0) {
763 if (part - boundary <= boundary - part)
764 return part - boundary;
765 else
766 return boundary - part;
767 }
768
769 if (part == boundary) {
770 while (--count)
771 if (parts[count])
772 return ~(APFloatBase::integerPart) 0; /* A lot. */
773
774 return parts[0];
775 } else if (part == boundary - 1) {
776 while (--count)
777 if (~parts[count])
778 return ~(APFloatBase::integerPart) 0; /* A lot. */
779
780 return -parts[0];
781 }
782
783 return ~(APFloatBase::integerPart) 0; /* A lot. */
784}
785
786/* Place pow(5, power) in DST, and return the number of parts used.
787 DST must be at least one part larger than size of the answer. */
788static unsigned int
789powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
790 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
791 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
792 pow5s[0] = 78125 * 5;
793
794 unsigned int partsCount = 1;
795 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
796 unsigned int result;
797 assert(power <= maxExponent);
798
799 p1 = dst;
800 p2 = scratch;
801
802 *p1 = firstEightPowers[power & 7];
803 power >>= 3;
804
805 result = 1;
806 pow5 = pow5s;
807
808 for (unsigned int n = 0; power; power >>= 1, n++) {
809 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
810 if (n != 0) {
811 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
812 partsCount, partsCount);
813 partsCount *= 2;
814 if (pow5[partsCount - 1] == 0)
815 partsCount--;
816 }
817
818 if (power & 1) {
819 APFloatBase::integerPart *tmp;
820
821 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
822 result += partsCount;
823 if (p2[result - 1] == 0)
824 result--;
825
826 /* Now result is in p1 with partsCount parts and p2 is scratch
827 space. */
828 tmp = p1;
829 p1 = p2;
830 p2 = tmp;
831 }
832
833 pow5 += partsCount;
834 }
835
836 if (p1 != dst)
837 APInt::tcAssign(dst, p1, result);
838
839 return result;
840}
841
842/* Zero at the end to avoid modular arithmetic when adding one; used
843 when rounding up during hexadecimal output. */
844static const char hexDigitsLower[] = "0123456789abcdef0";
845static const char hexDigitsUpper[] = "0123456789ABCDEF0";
846static const char infinityL[] = "infinity";
847static const char infinityU[] = "INFINITY";
848static const char NaNL[] = "nan";
849static const char NaNU[] = "NAN";
850
851/* Write out an integerPart in hexadecimal, starting with the most
852 significant nibble. Write out exactly COUNT hexdigits, return
853 COUNT. */
854static unsigned int
855partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
856 const char *hexDigitChars)
857{
858 unsigned int result = count;
859
860 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
861
862 part >>= (APFloatBase::integerPartWidth - 4 * count);
863 while (count--) {
864 dst[count] = hexDigitChars[part & 0xf];
865 part >>= 4;
866 }
867
868 return result;
869}
870
871/* Write out an unsigned decimal integer. */
872static char *
873writeUnsignedDecimal (char *dst, unsigned int n)
874{
875 char buff[40], *p;
876
877 p = buff;
878 do
879 *p++ = '0' + n % 10;
880 while (n /= 10);
881
882 do
883 *dst++ = *--p;
884 while (p != buff);
885
886 return dst;
887}
888
889/* Write out a signed decimal integer. */
890static char *
891writeSignedDecimal (char *dst, int value)
892{
893 if (value < 0) {
894 *dst++ = '-';
895 dst = writeUnsignedDecimal(dst, n: -(unsigned) value);
896 } else {
897 dst = writeUnsignedDecimal(dst, n: value);
898 }
899
900 return dst;
901}
902
903namespace detail {
904/* Constructors. */
905void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
906 unsigned int count;
907
908 semantics = ourSemantics;
909 count = partCount();
910 if (count > 1)
911 significand.parts = new integerPart[count];
912}
913
914void IEEEFloat::freeSignificand() {
915 if (needsCleanup())
916 delete [] significand.parts;
917}
918
919void IEEEFloat::assign(const IEEEFloat &rhs) {
920 assert(semantics == rhs.semantics);
921
922 sign = rhs.sign;
923 category = rhs.category;
924 exponent = rhs.exponent;
925 if (isFiniteNonZero() || category == fcNaN)
926 copySignificand(rhs);
927}
928
929void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
930 assert(isFiniteNonZero() || category == fcNaN);
931 assert(rhs.partCount() >= partCount());
932
933 APInt::tcAssign(significandParts(), rhs.significandParts(),
934 partCount());
935}
936
937/* Make this number a NaN, with an arbitrary but deterministic value
938 for the significand. If double or longer, this is a signalling NaN,
939 which may not be ideal. If float, this is QNaN(0). */
940void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
941 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
942 llvm_unreachable("This floating point format does not support NaN");
943
944 if (Negative && !semantics->hasSignedRepr)
945 llvm_unreachable(
946 "This floating point format does not support signed values");
947
948 category = fcNaN;
949 sign = Negative;
950 exponent = exponentNaN();
951
952 integerPart *significand = significandParts();
953 unsigned numParts = partCount();
954
955 APInt fill_storage;
956 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
957 // Finite-only types do not distinguish signalling and quiet NaN, so
958 // make them all signalling.
959 SNaN = false;
960 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
961 sign = true;
962 fill_storage = APInt::getZero(numBits: semantics->precision - 1);
963 } else {
964 fill_storage = APInt::getAllOnes(numBits: semantics->precision - 1);
965 }
966 fill = &fill_storage;
967 }
968
969 // Set the significand bits to the fill.
970 if (!fill || fill->getNumWords() < numParts)
971 APInt::tcSet(significand, 0, numParts);
972 if (fill) {
973 APInt::tcAssign(significand, fill->getRawData(),
974 std::min(a: fill->getNumWords(), b: numParts));
975
976 // Zero out the excess bits of the significand.
977 unsigned bitsToPreserve = semantics->precision - 1;
978 unsigned part = bitsToPreserve / 64;
979 bitsToPreserve %= 64;
980 significand[part] &= ((1ULL << bitsToPreserve) - 1);
981 for (part++; part != numParts; ++part)
982 significand[part] = 0;
983 }
984
985 unsigned QNaNBit =
986 (semantics->precision >= 2) ? (semantics->precision - 2) : 0;
987
988 if (SNaN) {
989 // We always have to clear the QNaN bit to make it an SNaN.
990 APInt::tcClearBit(significand, bit: QNaNBit);
991
992 // If there are no bits set in the payload, we have to set
993 // *something* to make it a NaN instead of an infinity;
994 // conventionally, this is the next bit down from the QNaN bit.
995 if (APInt::tcIsZero(significand, numParts))
996 APInt::tcSetBit(significand, bit: QNaNBit - 1);
997 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
998 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
999 // Do nothing.
1000 } else {
1001 // We always have to set the QNaN bit to make it a QNaN.
1002 APInt::tcSetBit(significand, bit: QNaNBit);
1003 }
1004
1005 // For x87 extended precision, we want to make a NaN, not a
1006 // pseudo-NaN. Maybe we should expose the ability to make
1007 // pseudo-NaNs?
1008 if (semantics == &semX87DoubleExtended)
1009 APInt::tcSetBit(significand, bit: QNaNBit + 1);
1010}
1011
1012IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
1013 if (this != &rhs) {
1014 if (semantics != rhs.semantics) {
1015 freeSignificand();
1016 initialize(ourSemantics: rhs.semantics);
1017 }
1018 assign(rhs);
1019 }
1020
1021 return *this;
1022}
1023
1024IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
1025 freeSignificand();
1026
1027 semantics = rhs.semantics;
1028 significand = rhs.significand;
1029 exponent = rhs.exponent;
1030 category = rhs.category;
1031 sign = rhs.sign;
1032
1033 rhs.semantics = &semBogus;
1034 return *this;
1035}
1036
1037bool IEEEFloat::isDenormal() const {
1038 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
1039 (APInt::tcExtractBit(significandParts(),
1040 bit: semantics->precision - 1) == 0);
1041}
1042
1043bool IEEEFloat::isSmallest() const {
1044 // The smallest number by magnitude in our format will be the smallest
1045 // denormal, i.e. the floating point number with exponent being minimum
1046 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
1047 return isFiniteNonZero() && exponent == semantics->minExponent &&
1048 significandMSB() == 0;
1049}
1050
1051bool IEEEFloat::isSmallestNormalized() const {
1052 return getCategory() == fcNormal && exponent == semantics->minExponent &&
1053 isSignificandAllZerosExceptMSB();
1054}
1055
1056unsigned int IEEEFloat::getNumHighBits() const {
1057 const unsigned int PartCount = partCountForBits(bits: semantics->precision);
1058 const unsigned int Bits = PartCount * integerPartWidth;
1059
1060 // Compute how many bits are used in the final word.
1061 // When precision is just 1, it represents the 'Pth'
1062 // Precision bit and not the actual significand bit.
1063 const unsigned int NumHighBits = (semantics->precision > 1)
1064 ? (Bits - semantics->precision + 1)
1065 : (Bits - semantics->precision);
1066 return NumHighBits;
1067}
1068
1069bool IEEEFloat::isSignificandAllOnes() const {
1070 // Test if the significand excluding the integral bit is all ones. This allows
1071 // us to test for binade boundaries.
1072 const integerPart *Parts = significandParts();
1073 const unsigned PartCount = partCountForBits(bits: semantics->precision);
1074 for (unsigned i = 0; i < PartCount - 1; i++)
1075 if (~Parts[i])
1076 return false;
1077
1078 // Set the unused high bits to all ones when we compare.
1079 const unsigned NumHighBits = getNumHighBits();
1080 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1081 "Can not have more high bits to fill than integerPartWidth");
1082 const integerPart HighBitFill =
1083 ~integerPart(0) << (integerPartWidth - NumHighBits);
1084 if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill)))
1085 return false;
1086
1087 return true;
1088}
1089
1090bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1091 // Test if the significand excluding the integral bit is all ones except for
1092 // the least significant bit.
1093 const integerPart *Parts = significandParts();
1094
1095 if (Parts[0] & 1)
1096 return false;
1097
1098 const unsigned PartCount = partCountForBits(bits: semantics->precision);
1099 for (unsigned i = 0; i < PartCount - 1; i++) {
1100 if (~Parts[i] & ~unsigned{!i})
1101 return false;
1102 }
1103
1104 // Set the unused high bits to all ones when we compare.
1105 const unsigned NumHighBits = getNumHighBits();
1106 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1107 "Can not have more high bits to fill than integerPartWidth");
1108 const integerPart HighBitFill = ~integerPart(0)
1109 << (integerPartWidth - NumHighBits);
1110 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1111 return false;
1112
1113 return true;
1114}
1115
1116bool IEEEFloat::isSignificandAllZeros() const {
1117 // Test if the significand excluding the integral bit is all zeros. This
1118 // allows us to test for binade boundaries.
1119 const integerPart *Parts = significandParts();
1120 const unsigned PartCount = partCountForBits(bits: semantics->precision);
1121
1122 for (unsigned i = 0; i < PartCount - 1; i++)
1123 if (Parts[i])
1124 return false;
1125
1126 // Compute how many bits are used in the final word.
1127 const unsigned NumHighBits = getNumHighBits();
1128 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1129 "clear than integerPartWidth");
1130 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1131
1132 if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask))
1133 return false;
1134
1135 return true;
1136}
1137
1138bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1139 const integerPart *Parts = significandParts();
1140 const unsigned PartCount = partCountForBits(bits: semantics->precision);
1141
1142 for (unsigned i = 0; i < PartCount - 1; i++) {
1143 if (Parts[i])
1144 return false;
1145 }
1146
1147 const unsigned NumHighBits = getNumHighBits();
1148 const integerPart MSBMask = integerPart(1)
1149 << (integerPartWidth - NumHighBits);
1150 return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask));
1151}
1152
1153bool IEEEFloat::isLargest() const {
1154 bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent;
1155 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1156 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1157 // The largest number by magnitude in our format will be the floating point
1158 // number with maximum exponent and with significand that is all ones except
1159 // the LSB.
1160 return (IsMaxExp && APFloat::hasSignificand(Sem: *semantics))
1161 ? isSignificandAllOnesExceptLSB()
1162 : IsMaxExp;
1163 } else {
1164 // The largest number by magnitude in our format will be the floating point
1165 // number with maximum exponent and with significand that is all ones.
1166 return IsMaxExp && isSignificandAllOnes();
1167 }
1168}
1169
1170bool IEEEFloat::isInteger() const {
1171 // This could be made more efficient; I'm going for obviously correct.
1172 if (!isFinite()) return false;
1173 IEEEFloat truncated = *this;
1174 truncated.roundToIntegral(rmTowardZero);
1175 return compare(truncated) == cmpEqual;
1176}
1177
1178bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1179 if (this == &rhs)
1180 return true;
1181 if (semantics != rhs.semantics ||
1182 category != rhs.category ||
1183 sign != rhs.sign)
1184 return false;
1185 if (category==fcZero || category==fcInfinity)
1186 return true;
1187
1188 if (isFiniteNonZero() && exponent != rhs.exponent)
1189 return false;
1190
1191 return std::equal(first1: significandParts(), last1: significandParts() + partCount(),
1192 first2: rhs.significandParts());
1193}
1194
1195IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
1196 initialize(ourSemantics: &ourSemantics);
1197 sign = 0;
1198 category = fcNormal;
1199 zeroSignificand();
1200 exponent = ourSemantics.precision - 1;
1201 significandParts()[0] = value;
1202 normalize(rmNearestTiesToEven, lfExactlyZero);
1203}
1204
1205IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
1206 initialize(ourSemantics: &ourSemantics);
1207 // The Float8E8MOFNU format does not have a representation
1208 // for zero. So, use the closest representation instead.
1209 // Moreover, the all-zero encoding represents a valid
1210 // normal value (which is the smallestNormalized here).
1211 // Hence, we call makeSmallestNormalized (where category is
1212 // 'fcNormal') instead of makeZero (where category is 'fcZero').
1213 ourSemantics.hasZero ? makeZero(Neg: false) : makeSmallestNormalized(Negative: false);
1214}
1215
1216// Delegate to the previous constructor, because later copy constructor may
1217// actually inspects category, which can't be garbage.
1218IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
1219 : IEEEFloat(ourSemantics) {}
1220
1221IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
1222 initialize(ourSemantics: rhs.semantics);
1223 assign(rhs);
1224}
1225
1226IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
1227 *this = std::move(rhs);
1228}
1229
1230IEEEFloat::~IEEEFloat() { freeSignificand(); }
1231
1232unsigned int IEEEFloat::partCount() const {
1233 return partCountForBits(bits: semantics->precision + 1);
1234}
1235
1236const APFloat::integerPart *IEEEFloat::significandParts() const {
1237 return const_cast<IEEEFloat *>(this)->significandParts();
1238}
1239
1240APFloat::integerPart *IEEEFloat::significandParts() {
1241 if (partCount() > 1)
1242 return significand.parts;
1243 else
1244 return &significand.part;
1245}
1246
1247void IEEEFloat::zeroSignificand() {
1248 APInt::tcSet(significandParts(), 0, partCount());
1249}
1250
1251/* Increment an fcNormal floating point number's significand. */
1252void IEEEFloat::incrementSignificand() {
1253 integerPart carry;
1254
1255 carry = APInt::tcIncrement(dst: significandParts(), parts: partCount());
1256
1257 /* Our callers should never cause us to overflow. */
1258 assert(carry == 0);
1259 (void)carry;
1260}
1261
1262/* Add the significand of the RHS. Returns the carry flag. */
1263APFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1264 integerPart *parts;
1265
1266 parts = significandParts();
1267
1268 assert(semantics == rhs.semantics);
1269 assert(exponent == rhs.exponent);
1270
1271 return APInt::tcAdd(parts, rhs.significandParts(), carry: 0, partCount());
1272}
1273
1274/* Subtract the significand of the RHS with a borrow flag. Returns
1275 the borrow flag. */
1276APFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1277 integerPart borrow) {
1278 integerPart *parts;
1279
1280 parts = significandParts();
1281
1282 assert(semantics == rhs.semantics);
1283 assert(exponent == rhs.exponent);
1284
1285 return APInt::tcSubtract(parts, rhs.significandParts(), carry: borrow,
1286 partCount());
1287}
1288
1289/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1290 on to the full-precision result of the multiplication. Returns the
1291 lost fraction. */
1292lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1293 IEEEFloat addend,
1294 bool ignoreAddend) {
1295 unsigned int omsb; // One, not zero, based MSB.
1296 unsigned int partsCount, newPartsCount, precision;
1297 integerPart *lhsSignificand;
1298 integerPart scratch[4];
1299 integerPart *fullSignificand;
1300 lostFraction lost_fraction;
1301 bool ignored;
1302
1303 assert(semantics == rhs.semantics);
1304
1305 precision = semantics->precision;
1306
1307 // Allocate space for twice as many bits as the original significand, plus one
1308 // extra bit for the addition to overflow into.
1309 newPartsCount = partCountForBits(bits: precision * 2 + 1);
1310
1311 if (newPartsCount > 4)
1312 fullSignificand = new integerPart[newPartsCount];
1313 else
1314 fullSignificand = scratch;
1315
1316 lhsSignificand = significandParts();
1317 partsCount = partCount();
1318
1319 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1320 rhs.significandParts(), partsCount, partsCount);
1321
1322 lost_fraction = lfExactlyZero;
1323 omsb = APInt::tcMSB(parts: fullSignificand, n: newPartsCount) + 1;
1324 exponent += rhs.exponent;
1325
1326 // Assume the operands involved in the multiplication are single-precision
1327 // FP, and the two multiplicants are:
1328 // *this = a23 . a22 ... a0 * 2^e1
1329 // rhs = b23 . b22 ... b0 * 2^e2
1330 // the result of multiplication is:
1331 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1332 // Note that there are three significant bits at the left-hand side of the
1333 // radix point: two for the multiplication, and an overflow bit for the
1334 // addition (that will always be zero at this point). Move the radix point
1335 // toward left by two bits, and adjust exponent accordingly.
1336 exponent += 2;
1337
1338 if (!ignoreAddend && addend.isNonZero()) {
1339 // The intermediate result of the multiplication has "2 * precision"
1340 // signicant bit; adjust the addend to be consistent with mul result.
1341 //
1342 Significand savedSignificand = significand;
1343 const fltSemantics *savedSemantics = semantics;
1344 fltSemantics extendedSemantics;
1345 opStatus status;
1346 unsigned int extendedPrecision;
1347
1348 // Normalize our MSB to one below the top bit to allow for overflow.
1349 extendedPrecision = 2 * precision + 1;
1350 if (omsb != extendedPrecision - 1) {
1351 assert(extendedPrecision > omsb);
1352 APInt::tcShiftLeft(fullSignificand, Words: newPartsCount,
1353 Count: (extendedPrecision - 1) - omsb);
1354 exponent -= (extendedPrecision - 1) - omsb;
1355 }
1356
1357 /* Create new semantics. */
1358 extendedSemantics = *semantics;
1359 extendedSemantics.precision = extendedPrecision;
1360
1361 if (newPartsCount == 1)
1362 significand.part = fullSignificand[0];
1363 else
1364 significand.parts = fullSignificand;
1365 semantics = &extendedSemantics;
1366
1367 // Make a copy so we can convert it to the extended semantics.
1368 // Note that we cannot convert the addend directly, as the extendedSemantics
1369 // is a local variable (which we take a reference to).
1370 IEEEFloat extendedAddend(addend);
1371 status = extendedAddend.convert(extendedSemantics, APFloat::rmTowardZero,
1372 &ignored);
1373 assert(status == APFloat::opOK);
1374 (void)status;
1375
1376 // Shift the significand of the addend right by one bit. This guarantees
1377 // that the high bit of the significand is zero (same as fullSignificand),
1378 // so the addition will overflow (if it does overflow at all) into the top bit.
1379 lost_fraction = extendedAddend.shiftSignificandRight(1);
1380 assert(lost_fraction == lfExactlyZero &&
1381 "Lost precision while shifting addend for fused-multiply-add.");
1382
1383 lost_fraction = addOrSubtractSignificand(extendedAddend, subtract: false);
1384
1385 /* Restore our state. */
1386 if (newPartsCount == 1)
1387 fullSignificand[0] = significand.part;
1388 significand = savedSignificand;
1389 semantics = savedSemantics;
1390
1391 omsb = APInt::tcMSB(parts: fullSignificand, n: newPartsCount) + 1;
1392 }
1393
1394 // Convert the result having "2 * precision" significant-bits back to the one
1395 // having "precision" significant-bits. First, move the radix point from
1396 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1397 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1398 exponent -= precision + 1;
1399
1400 // In case MSB resides at the left-hand side of radix point, shift the
1401 // mantissa right by some amount to make sure the MSB reside right before
1402 // the radix point (i.e. "MSB . rest-significant-bits").
1403 //
1404 // Note that the result is not normalized when "omsb < precision". So, the
1405 // caller needs to call IEEEFloat::normalize() if normalized value is
1406 // expected.
1407 if (omsb > precision) {
1408 unsigned int bits, significantParts;
1409 lostFraction lf;
1410
1411 bits = omsb - precision;
1412 significantParts = partCountForBits(bits: omsb);
1413 lf = shiftRight(dst: fullSignificand, parts: significantParts, bits);
1414 lost_fraction = combineLostFractions(moreSignificant: lf, lessSignificant: lost_fraction);
1415 exponent += bits;
1416 }
1417
1418 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1419
1420 if (newPartsCount > 4)
1421 delete [] fullSignificand;
1422
1423 return lost_fraction;
1424}
1425
1426lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1427 // When the given semantics has zero, the addend here is a zero.
1428 // i.e . it belongs to the 'fcZero' category.
1429 // But when the semantics does not support zero, we need to
1430 // explicitly convey that this addend should be ignored
1431 // for multiplication.
1432 return multiplySignificand(rhs, addend: IEEEFloat(*semantics), ignoreAddend: !semantics->hasZero);
1433}
1434
1435/* Multiply the significands of LHS and RHS to DST. */
1436lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1437 unsigned int bit, i, partsCount;
1438 const integerPart *rhsSignificand;
1439 integerPart *lhsSignificand, *dividend, *divisor;
1440 integerPart scratch[4];
1441 lostFraction lost_fraction;
1442
1443 assert(semantics == rhs.semantics);
1444
1445 lhsSignificand = significandParts();
1446 rhsSignificand = rhs.significandParts();
1447 partsCount = partCount();
1448
1449 if (partsCount > 2)
1450 dividend = new integerPart[partsCount * 2];
1451 else
1452 dividend = scratch;
1453
1454 divisor = dividend + partsCount;
1455
1456 /* Copy the dividend and divisor as they will be modified in-place. */
1457 for (i = 0; i < partsCount; i++) {
1458 dividend[i] = lhsSignificand[i];
1459 divisor[i] = rhsSignificand[i];
1460 lhsSignificand[i] = 0;
1461 }
1462
1463 exponent -= rhs.exponent;
1464
1465 unsigned int precision = semantics->precision;
1466
1467 /* Normalize the divisor. */
1468 bit = precision - APInt::tcMSB(parts: divisor, n: partsCount) - 1;
1469 if (bit) {
1470 exponent += bit;
1471 APInt::tcShiftLeft(divisor, Words: partsCount, Count: bit);
1472 }
1473
1474 /* Normalize the dividend. */
1475 bit = precision - APInt::tcMSB(parts: dividend, n: partsCount) - 1;
1476 if (bit) {
1477 exponent -= bit;
1478 APInt::tcShiftLeft(dividend, Words: partsCount, Count: bit);
1479 }
1480
1481 /* Ensure the dividend >= divisor initially for the loop below.
1482 Incidentally, this means that the division loop below is
1483 guaranteed to set the integer bit to one. */
1484 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1485 exponent--;
1486 APInt::tcShiftLeft(dividend, Words: partsCount, Count: 1);
1487 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1488 }
1489
1490 /* Long division. */
1491 for (bit = precision; bit; bit -= 1) {
1492 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1493 APInt::tcSubtract(dividend, divisor, carry: 0, partsCount);
1494 APInt::tcSetBit(lhsSignificand, bit: bit - 1);
1495 }
1496
1497 APInt::tcShiftLeft(dividend, Words: partsCount, Count: 1);
1498 }
1499
1500 /* Figure out the lost fraction. */
1501 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1502
1503 if (cmp > 0)
1504 lost_fraction = lfMoreThanHalf;
1505 else if (cmp == 0)
1506 lost_fraction = lfExactlyHalf;
1507 else if (APInt::tcIsZero(dividend, partsCount))
1508 lost_fraction = lfExactlyZero;
1509 else
1510 lost_fraction = lfLessThanHalf;
1511
1512 if (partsCount > 2)
1513 delete [] dividend;
1514
1515 return lost_fraction;
1516}
1517
1518unsigned int IEEEFloat::significandMSB() const {
1519 return APInt::tcMSB(parts: significandParts(), n: partCount());
1520}
1521
1522unsigned int IEEEFloat::significandLSB() const {
1523 return APInt::tcLSB(significandParts(), n: partCount());
1524}
1525
1526/* Note that a zero result is NOT normalized to fcZero. */
1527lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1528 /* Our exponent should not overflow. */
1529 assert((ExponentType) (exponent + bits) >= exponent);
1530
1531 exponent += bits;
1532
1533 return shiftRight(dst: significandParts(), parts: partCount(), bits);
1534}
1535
1536/* Shift the significand left BITS bits, subtract BITS from its exponent. */
1537void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1538 assert(bits < semantics->precision ||
1539 (semantics->precision == 1 && bits <= 1));
1540
1541 if (bits) {
1542 unsigned int partsCount = partCount();
1543
1544 APInt::tcShiftLeft(significandParts(), Words: partsCount, Count: bits);
1545 exponent -= bits;
1546
1547 assert(!APInt::tcIsZero(significandParts(), partsCount));
1548 }
1549}
1550
1551APFloat::cmpResult IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1552 int compare;
1553
1554 assert(semantics == rhs.semantics);
1555 assert(isFiniteNonZero());
1556 assert(rhs.isFiniteNonZero());
1557
1558 compare = exponent - rhs.exponent;
1559
1560 /* If exponents are equal, do an unsigned bignum comparison of the
1561 significands. */
1562 if (compare == 0)
1563 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1564 partCount());
1565
1566 if (compare > 0)
1567 return cmpGreaterThan;
1568 else if (compare < 0)
1569 return cmpLessThan;
1570 else
1571 return cmpEqual;
1572}
1573
1574/* Set the least significant BITS bits of a bignum, clear the
1575 rest. */
1576static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1577 unsigned bits) {
1578 unsigned i = 0;
1579 while (bits > APInt::APINT_BITS_PER_WORD) {
1580 dst[i++] = ~(APInt::WordType)0;
1581 bits -= APInt::APINT_BITS_PER_WORD;
1582 }
1583
1584 if (bits)
1585 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1586
1587 while (i < parts)
1588 dst[i++] = 0;
1589}
1590
1591/* Handle overflow. Sign is preserved. We either become infinity or
1592 the largest finite number. */
1593APFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1594 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) {
1595 /* Infinity? */
1596 if (rounding_mode == rmNearestTiesToEven ||
1597 rounding_mode == rmNearestTiesToAway ||
1598 (rounding_mode == rmTowardPositive && !sign) ||
1599 (rounding_mode == rmTowardNegative && sign)) {
1600 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1601 makeNaN(SNaN: false, Negative: sign);
1602 else
1603 category = fcInfinity;
1604 return static_cast<opStatus>(opOverflow | opInexact);
1605 }
1606 }
1607
1608 /* Otherwise we become the largest finite number. */
1609 category = fcNormal;
1610 exponent = semantics->maxExponent;
1611 tcSetLeastSignificantBits(dst: significandParts(), parts: partCount(),
1612 bits: semantics->precision);
1613 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1614 semantics->nanEncoding == fltNanEncoding::AllOnes)
1615 APInt::tcClearBit(significandParts(), bit: 0);
1616
1617 return opInexact;
1618}
1619
1620/* Returns TRUE if, when truncating the current number, with BIT the
1621 new LSB, with the given lost fraction and rounding mode, the result
1622 would need to be rounded away from zero (i.e., by increasing the
1623 signficand). This routine must work for fcZero of both signs, and
1624 fcNormal numbers. */
1625bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1626 lostFraction lost_fraction,
1627 unsigned int bit) const {
1628 /* NaNs and infinities should not have lost fractions. */
1629 assert(isFiniteNonZero() || category == fcZero);
1630
1631 /* Current callers never pass this so we don't handle it. */
1632 assert(lost_fraction != lfExactlyZero);
1633
1634 switch (rounding_mode) {
1635 case rmNearestTiesToAway:
1636 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1637
1638 case rmNearestTiesToEven:
1639 if (lost_fraction == lfMoreThanHalf)
1640 return true;
1641
1642 /* Our zeroes don't have a significand to test. */
1643 if (lost_fraction == lfExactlyHalf && category != fcZero)
1644 return APInt::tcExtractBit(significandParts(), bit);
1645
1646 return false;
1647
1648 case rmTowardZero:
1649 return false;
1650
1651 case rmTowardPositive:
1652 return !sign;
1653
1654 case rmTowardNegative:
1655 return sign;
1656
1657 default:
1658 break;
1659 }
1660 llvm_unreachable("Invalid rounding mode found");
1661}
1662
1663APFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1664 lostFraction lost_fraction) {
1665 unsigned int omsb; /* One, not zero, based MSB. */
1666 int exponentChange;
1667
1668 if (!isFiniteNonZero())
1669 return opOK;
1670
1671 /* Before rounding normalize the exponent of fcNormal numbers. */
1672 omsb = significandMSB() + 1;
1673
1674 // Only skip this `if` if the value is exactly zero.
1675 if (omsb || lost_fraction != lfExactlyZero) {
1676 /* OMSB is numbered from 1. We want to place it in the integer
1677 bit numbered PRECISION if possible, with a compensating change in
1678 the exponent. */
1679 exponentChange = omsb - semantics->precision;
1680
1681 /* If the resulting exponent is too high, overflow according to
1682 the rounding mode. */
1683 if (exponent + exponentChange > semantics->maxExponent)
1684 return handleOverflow(rounding_mode);
1685
1686 /* Subnormal numbers have exponent minExponent, and their MSB
1687 is forced based on that. */
1688 if (exponent + exponentChange < semantics->minExponent)
1689 exponentChange = semantics->minExponent - exponent;
1690
1691 /* Shifting left is easy as we don't lose precision. */
1692 if (exponentChange < 0) {
1693 assert(lost_fraction == lfExactlyZero);
1694
1695 shiftSignificandLeft(bits: -exponentChange);
1696
1697 return opOK;
1698 }
1699
1700 if (exponentChange > 0) {
1701 lostFraction lf;
1702
1703 /* Shift right and capture any new lost fraction. */
1704 lf = shiftSignificandRight(bits: exponentChange);
1705
1706 lost_fraction = combineLostFractions(moreSignificant: lf, lessSignificant: lost_fraction);
1707
1708 /* Keep OMSB up-to-date. */
1709 if (omsb > (unsigned) exponentChange)
1710 omsb -= exponentChange;
1711 else
1712 omsb = 0;
1713 }
1714 }
1715
1716 // The all-ones values is an overflow if NaN is all ones. If NaN is
1717 // represented by negative zero, then it is a valid finite value.
1718 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1719 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1720 exponent == semantics->maxExponent && isSignificandAllOnes())
1721 return handleOverflow(rounding_mode);
1722
1723 /* Now round the number according to rounding_mode given the lost
1724 fraction. */
1725
1726 /* As specified in IEEE 754, since we do not trap we do not report
1727 underflow for exact results. */
1728 if (lost_fraction == lfExactlyZero) {
1729 /* Canonicalize zeroes. */
1730 if (omsb == 0) {
1731 category = fcZero;
1732 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1733 sign = false;
1734 if (!semantics->hasZero)
1735 makeSmallestNormalized(Negative: false);
1736 }
1737
1738 return opOK;
1739 }
1740
1741 /* Increment the significand if we're rounding away from zero. */
1742 if (roundAwayFromZero(rounding_mode, lost_fraction, bit: 0)) {
1743 if (omsb == 0)
1744 exponent = semantics->minExponent;
1745
1746 incrementSignificand();
1747 omsb = significandMSB() + 1;
1748
1749 /* Did the significand increment overflow? */
1750 if (omsb == (unsigned) semantics->precision + 1) {
1751 /* Renormalize by incrementing the exponent and shifting our
1752 significand right one. However if we already have the
1753 maximum exponent we overflow to infinity. */
1754 if (exponent == semantics->maxExponent)
1755 // Invoke overflow handling with a rounding mode that will guarantee
1756 // that the result gets turned into the correct infinity representation.
1757 // This is needed instead of just setting the category to infinity to
1758 // account for 8-bit floating point types that have no inf, only NaN.
1759 return handleOverflow(rounding_mode: sign ? rmTowardNegative : rmTowardPositive);
1760
1761 shiftSignificandRight(bits: 1);
1762
1763 return opInexact;
1764 }
1765
1766 // The all-ones values is an overflow if NaN is all ones. If NaN is
1767 // represented by negative zero, then it is a valid finite value.
1768 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1769 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1770 exponent == semantics->maxExponent && isSignificandAllOnes())
1771 return handleOverflow(rounding_mode);
1772 }
1773
1774 /* The normal case - we were and are not denormal, and any
1775 significand increment above didn't overflow. */
1776 if (omsb == semantics->precision)
1777 return opInexact;
1778
1779 /* We have a non-zero denormal. */
1780 assert(omsb < semantics->precision);
1781
1782 /* Canonicalize zeroes. */
1783 if (omsb == 0) {
1784 category = fcZero;
1785 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1786 sign = false;
1787 // This condition handles the case where the semantics
1788 // does not have zero but uses the all-zero encoding
1789 // to represent the smallest normal value.
1790 if (!semantics->hasZero)
1791 makeSmallestNormalized(Negative: false);
1792 }
1793
1794 /* The fcZero case is a denormal that underflowed to zero. */
1795 return (opStatus) (opUnderflow | opInexact);
1796}
1797
1798APFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1799 bool subtract) {
1800 switch (PackCategoriesIntoKey(category, rhs.category)) {
1801 default:
1802 llvm_unreachable(nullptr);
1803
1804 case PackCategoriesIntoKey(fcZero, fcNaN):
1805 case PackCategoriesIntoKey(fcNormal, fcNaN):
1806 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1807 assign(rhs);
1808 [[fallthrough]];
1809 case PackCategoriesIntoKey(fcNaN, fcZero):
1810 case PackCategoriesIntoKey(fcNaN, fcNormal):
1811 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1812 case PackCategoriesIntoKey(fcNaN, fcNaN):
1813 if (isSignaling()) {
1814 makeQuiet();
1815 return opInvalidOp;
1816 }
1817 return rhs.isSignaling() ? opInvalidOp : opOK;
1818
1819 case PackCategoriesIntoKey(fcNormal, fcZero):
1820 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1821 case PackCategoriesIntoKey(fcInfinity, fcZero):
1822 return opOK;
1823
1824 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1825 case PackCategoriesIntoKey(fcZero, fcInfinity):
1826 category = fcInfinity;
1827 sign = rhs.sign ^ subtract;
1828 return opOK;
1829
1830 case PackCategoriesIntoKey(fcZero, fcNormal):
1831 assign(rhs);
1832 sign = rhs.sign ^ subtract;
1833 return opOK;
1834
1835 case PackCategoriesIntoKey(fcZero, fcZero):
1836 /* Sign depends on rounding mode; handled by caller. */
1837 return opOK;
1838
1839 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1840 /* Differently signed infinities can only be validly
1841 subtracted. */
1842 if (((sign ^ rhs.sign)!=0) != subtract) {
1843 makeNaN();
1844 return opInvalidOp;
1845 }
1846
1847 return opOK;
1848
1849 case PackCategoriesIntoKey(fcNormal, fcNormal):
1850 return opDivByZero;
1851 }
1852}
1853
1854/* Add or subtract two normal numbers. */
1855lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1856 bool subtract) {
1857 integerPart carry = 0;
1858 lostFraction lost_fraction;
1859 int bits;
1860
1861 /* Determine if the operation on the absolute values is effectively
1862 an addition or subtraction. */
1863 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1864
1865 /* Are we bigger exponent-wise than the RHS? */
1866 bits = exponent - rhs.exponent;
1867
1868 /* Subtraction is more subtle than one might naively expect. */
1869 if (subtract) {
1870 if ((bits < 0) && !semantics->hasSignedRepr)
1871 llvm_unreachable(
1872 "This floating point format does not support signed values");
1873
1874 IEEEFloat temp_rhs(rhs);
1875 bool lost_fraction_is_from_rhs = false;
1876
1877 if (bits == 0)
1878 lost_fraction = lfExactlyZero;
1879 else if (bits > 0) {
1880 lost_fraction = temp_rhs.shiftSignificandRight(bits: bits - 1);
1881 lost_fraction_is_from_rhs = true;
1882 shiftSignificandLeft(bits: 1);
1883 } else {
1884 lost_fraction = shiftSignificandRight(bits: -bits - 1);
1885 temp_rhs.shiftSignificandLeft(bits: 1);
1886 }
1887
1888 // Should we reverse the subtraction.
1889 cmpResult cmp_result = compareAbsoluteValue(rhs: temp_rhs);
1890 if (cmp_result == cmpLessThan) {
1891 bool borrow =
1892 lost_fraction != lfExactlyZero && !lost_fraction_is_from_rhs;
1893 if (borrow) {
1894 // The lost fraction is being subtracted, borrow from the significand
1895 // and invert `lost_fraction`.
1896 if (lost_fraction == lfLessThanHalf)
1897 lost_fraction = lfMoreThanHalf;
1898 else if (lost_fraction == lfMoreThanHalf)
1899 lost_fraction = lfLessThanHalf;
1900 }
1901 carry = temp_rhs.subtractSignificand(rhs: *this, borrow);
1902 copySignificand(rhs: temp_rhs);
1903 sign = !sign;
1904 } else if (cmp_result == cmpGreaterThan) {
1905 bool borrow = lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs;
1906 if (borrow) {
1907 // The lost fraction is being subtracted, borrow from the significand
1908 // and invert `lost_fraction`.
1909 if (lost_fraction == lfLessThanHalf)
1910 lost_fraction = lfMoreThanHalf;
1911 else if (lost_fraction == lfMoreThanHalf)
1912 lost_fraction = lfLessThanHalf;
1913 }
1914 carry = subtractSignificand(rhs: temp_rhs, borrow);
1915 } else { // cmpEqual
1916 zeroSignificand();
1917 if (lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs) {
1918 // rhs is slightly larger due to the lost fraction, flip the sign.
1919 sign = !sign;
1920 }
1921 }
1922
1923 /* The code above is intended to ensure that no borrow is
1924 necessary. */
1925 assert(!carry);
1926 (void)carry;
1927 } else {
1928 if (bits > 0) {
1929 IEEEFloat temp_rhs(rhs);
1930
1931 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1932 carry = addSignificand(rhs: temp_rhs);
1933 } else {
1934 lost_fraction = shiftSignificandRight(bits: -bits);
1935 carry = addSignificand(rhs);
1936 }
1937
1938 /* We have a guard bit; generating a carry cannot happen. */
1939 assert(!carry);
1940 (void)carry;
1941 }
1942
1943 return lost_fraction;
1944}
1945
1946APFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1947 switch (PackCategoriesIntoKey(category, rhs.category)) {
1948 default:
1949 llvm_unreachable(nullptr);
1950
1951 case PackCategoriesIntoKey(fcZero, fcNaN):
1952 case PackCategoriesIntoKey(fcNormal, fcNaN):
1953 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1954 assign(rhs);
1955 sign = false;
1956 [[fallthrough]];
1957 case PackCategoriesIntoKey(fcNaN, fcZero):
1958 case PackCategoriesIntoKey(fcNaN, fcNormal):
1959 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1960 case PackCategoriesIntoKey(fcNaN, fcNaN):
1961 sign ^= rhs.sign; // restore the original sign
1962 if (isSignaling()) {
1963 makeQuiet();
1964 return opInvalidOp;
1965 }
1966 return rhs.isSignaling() ? opInvalidOp : opOK;
1967
1968 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1969 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1970 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1971 category = fcInfinity;
1972 return opOK;
1973
1974 case PackCategoriesIntoKey(fcZero, fcNormal):
1975 case PackCategoriesIntoKey(fcNormal, fcZero):
1976 case PackCategoriesIntoKey(fcZero, fcZero):
1977 category = fcZero;
1978 return opOK;
1979
1980 case PackCategoriesIntoKey(fcZero, fcInfinity):
1981 case PackCategoriesIntoKey(fcInfinity, fcZero):
1982 makeNaN();
1983 return opInvalidOp;
1984
1985 case PackCategoriesIntoKey(fcNormal, fcNormal):
1986 return opOK;
1987 }
1988}
1989
1990APFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1991 switch (PackCategoriesIntoKey(category, rhs.category)) {
1992 default:
1993 llvm_unreachable(nullptr);
1994
1995 case PackCategoriesIntoKey(fcZero, fcNaN):
1996 case PackCategoriesIntoKey(fcNormal, fcNaN):
1997 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1998 assign(rhs);
1999 sign = false;
2000 [[fallthrough]];
2001 case PackCategoriesIntoKey(fcNaN, fcZero):
2002 case PackCategoriesIntoKey(fcNaN, fcNormal):
2003 case PackCategoriesIntoKey(fcNaN, fcInfinity):
2004 case PackCategoriesIntoKey(fcNaN, fcNaN):
2005 sign ^= rhs.sign; // restore the original sign
2006 if (isSignaling()) {
2007 makeQuiet();
2008 return opInvalidOp;
2009 }
2010 return rhs.isSignaling() ? opInvalidOp : opOK;
2011
2012 case PackCategoriesIntoKey(fcInfinity, fcZero):
2013 case PackCategoriesIntoKey(fcInfinity, fcNormal):
2014 case PackCategoriesIntoKey(fcZero, fcInfinity):
2015 case PackCategoriesIntoKey(fcZero, fcNormal):
2016 return opOK;
2017
2018 case PackCategoriesIntoKey(fcNormal, fcInfinity):
2019 category = fcZero;
2020 return opOK;
2021
2022 case PackCategoriesIntoKey(fcNormal, fcZero):
2023 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
2024 makeNaN(SNaN: false, Negative: sign);
2025 else
2026 category = fcInfinity;
2027 return opDivByZero;
2028
2029 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2030 case PackCategoriesIntoKey(fcZero, fcZero):
2031 makeNaN();
2032 return opInvalidOp;
2033
2034 case PackCategoriesIntoKey(fcNormal, fcNormal):
2035 return opOK;
2036 }
2037}
2038
2039APFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
2040 switch (PackCategoriesIntoKey(category, rhs.category)) {
2041 default:
2042 llvm_unreachable(nullptr);
2043
2044 case PackCategoriesIntoKey(fcZero, fcNaN):
2045 case PackCategoriesIntoKey(fcNormal, fcNaN):
2046 case PackCategoriesIntoKey(fcInfinity, fcNaN):
2047 assign(rhs);
2048 [[fallthrough]];
2049 case PackCategoriesIntoKey(fcNaN, fcZero):
2050 case PackCategoriesIntoKey(fcNaN, fcNormal):
2051 case PackCategoriesIntoKey(fcNaN, fcInfinity):
2052 case PackCategoriesIntoKey(fcNaN, fcNaN):
2053 if (isSignaling()) {
2054 makeQuiet();
2055 return opInvalidOp;
2056 }
2057 return rhs.isSignaling() ? opInvalidOp : opOK;
2058
2059 case PackCategoriesIntoKey(fcZero, fcInfinity):
2060 case PackCategoriesIntoKey(fcZero, fcNormal):
2061 case PackCategoriesIntoKey(fcNormal, fcInfinity):
2062 return opOK;
2063
2064 case PackCategoriesIntoKey(fcNormal, fcZero):
2065 case PackCategoriesIntoKey(fcInfinity, fcZero):
2066 case PackCategoriesIntoKey(fcInfinity, fcNormal):
2067 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2068 case PackCategoriesIntoKey(fcZero, fcZero):
2069 makeNaN();
2070 return opInvalidOp;
2071
2072 case PackCategoriesIntoKey(fcNormal, fcNormal):
2073 return opOK;
2074 }
2075}
2076
2077APFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
2078 switch (PackCategoriesIntoKey(category, rhs.category)) {
2079 default:
2080 llvm_unreachable(nullptr);
2081
2082 case PackCategoriesIntoKey(fcZero, fcNaN):
2083 case PackCategoriesIntoKey(fcNormal, fcNaN):
2084 case PackCategoriesIntoKey(fcInfinity, fcNaN):
2085 assign(rhs);
2086 [[fallthrough]];
2087 case PackCategoriesIntoKey(fcNaN, fcZero):
2088 case PackCategoriesIntoKey(fcNaN, fcNormal):
2089 case PackCategoriesIntoKey(fcNaN, fcInfinity):
2090 case PackCategoriesIntoKey(fcNaN, fcNaN):
2091 if (isSignaling()) {
2092 makeQuiet();
2093 return opInvalidOp;
2094 }
2095 return rhs.isSignaling() ? opInvalidOp : opOK;
2096
2097 case PackCategoriesIntoKey(fcZero, fcInfinity):
2098 case PackCategoriesIntoKey(fcZero, fcNormal):
2099 case PackCategoriesIntoKey(fcNormal, fcInfinity):
2100 return opOK;
2101
2102 case PackCategoriesIntoKey(fcNormal, fcZero):
2103 case PackCategoriesIntoKey(fcInfinity, fcZero):
2104 case PackCategoriesIntoKey(fcInfinity, fcNormal):
2105 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2106 case PackCategoriesIntoKey(fcZero, fcZero):
2107 makeNaN();
2108 return opInvalidOp;
2109
2110 case PackCategoriesIntoKey(fcNormal, fcNormal):
2111 return opDivByZero; // fake status, indicating this is not a special case
2112 }
2113}
2114
2115/* Change sign. */
2116void IEEEFloat::changeSign() {
2117 // With NaN-as-negative-zero, neither NaN or negative zero can change
2118 // their signs.
2119 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2120 (isZero() || isNaN()))
2121 return;
2122 /* Look mummy, this one's easy. */
2123 sign = !sign;
2124}
2125
2126/* Normalized addition or subtraction. */
2127APFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2128 roundingMode rounding_mode,
2129 bool subtract) {
2130 opStatus fs;
2131
2132 fs = addOrSubtractSpecials(rhs, subtract);
2133
2134 /* This return code means it was not a simple case. */
2135 if (fs == opDivByZero) {
2136 lostFraction lost_fraction;
2137
2138 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2139 fs = normalize(rounding_mode, lost_fraction);
2140
2141 /* Can only be zero if we lost no fraction. */
2142 assert(category != fcZero || lost_fraction == lfExactlyZero);
2143 }
2144
2145 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2146 positive zero unless rounding to minus infinity, except that
2147 adding two like-signed zeroes gives that zero. */
2148 if (category == fcZero) {
2149 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2150 sign = (rounding_mode == rmTowardNegative);
2151 // NaN-in-negative-zero means zeros need to be normalized to +0.
2152 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2153 sign = false;
2154 }
2155
2156 return fs;
2157}
2158
2159/* Normalized addition. */
2160APFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
2161 roundingMode rounding_mode) {
2162 return addOrSubtract(rhs, rounding_mode, subtract: false);
2163}
2164
2165/* Normalized subtraction. */
2166APFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
2167 roundingMode rounding_mode) {
2168 return addOrSubtract(rhs, rounding_mode, subtract: true);
2169}
2170
2171/* Normalized multiply. */
2172APFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
2173 roundingMode rounding_mode) {
2174 opStatus fs;
2175
2176 sign ^= rhs.sign;
2177 fs = multiplySpecials(rhs);
2178
2179 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2180 sign = false;
2181 if (isFiniteNonZero()) {
2182 lostFraction lost_fraction = multiplySignificand(rhs);
2183 fs = normalize(rounding_mode, lost_fraction);
2184 if (lost_fraction != lfExactlyZero)
2185 fs = (opStatus) (fs | opInexact);
2186 }
2187
2188 return fs;
2189}
2190
2191/* Normalized divide. */
2192APFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
2193 roundingMode rounding_mode) {
2194 opStatus fs;
2195
2196 sign ^= rhs.sign;
2197 fs = divideSpecials(rhs);
2198
2199 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2200 sign = false;
2201 if (isFiniteNonZero()) {
2202 lostFraction lost_fraction = divideSignificand(rhs);
2203 fs = normalize(rounding_mode, lost_fraction);
2204 if (lost_fraction != lfExactlyZero)
2205 fs = (opStatus) (fs | opInexact);
2206 }
2207
2208 return fs;
2209}
2210
2211/* Normalized remainder. */
2212APFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
2213 opStatus fs;
2214 unsigned int origSign = sign;
2215
2216 // First handle the special cases.
2217 fs = remainderSpecials(rhs);
2218 if (fs != opDivByZero)
2219 return fs;
2220
2221 fs = opOK;
2222
2223 // Make sure the current value is less than twice the denom. If the addition
2224 // did not succeed (an overflow has happened), which means that the finite
2225 // value we currently posses must be less than twice the denom (as we are
2226 // using the same semantics).
2227 IEEEFloat P2 = rhs;
2228 if (P2.add(rhs, rounding_mode: rmNearestTiesToEven) == opOK) {
2229 fs = mod(P2);
2230 assert(fs == opOK);
2231 }
2232
2233 // Lets work with absolute numbers.
2234 IEEEFloat P = rhs;
2235 P.sign = false;
2236 sign = false;
2237
2238 //
2239 // To calculate the remainder we use the following scheme.
2240 //
2241 // The remainder is defained as follows:
2242 //
2243 // remainder = numer - rquot * denom = x - r * p
2244 //
2245 // Where r is the result of: x/p, rounded toward the nearest integral value
2246 // (with halfway cases rounded toward the even number).
2247 //
2248 // Currently, (after x mod 2p):
2249 // r is the number of 2p's present inside x, which is inherently, an even
2250 // number of p's.
2251 //
2252 // We may split the remaining calculation into 4 options:
2253 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2254 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2255 // are done as well.
2256 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2257 // to subtract 1p at least once.
2258 // - if x >= p then we must subtract p at least once, as x must be a
2259 // remainder.
2260 //
2261 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2262 //
2263 // We can now split the remaining calculation to the following 3 options:
2264 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2265 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2266 // must round up to the next even number. so we must subtract p once more.
2267 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2268 // integral, and subtract p once more.
2269 //
2270
2271 // Extend the semantics to prevent an overflow/underflow or inexact result.
2272 bool losesInfo;
2273 fltSemantics extendedSemantics = *semantics;
2274 extendedSemantics.maxExponent++;
2275 extendedSemantics.minExponent--;
2276 extendedSemantics.precision += 2;
2277
2278 IEEEFloat VEx = *this;
2279 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2280 assert(fs == opOK && !losesInfo);
2281 IEEEFloat PEx = P;
2282 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2283 assert(fs == opOK && !losesInfo);
2284
2285 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2286 // any fraction.
2287 fs = VEx.add(rhs: VEx, rounding_mode: rmNearestTiesToEven);
2288 assert(fs == opOK);
2289
2290 if (VEx.compare(PEx) == cmpGreaterThan) {
2291 fs = subtract(rhs: P, rounding_mode: rmNearestTiesToEven);
2292 assert(fs == opOK);
2293
2294 // Make VEx = this.add(this), but because we have different semantics, we do
2295 // not want to `convert` again, so we just subtract PEx twice (which equals
2296 // to the desired value).
2297 fs = VEx.subtract(rhs: PEx, rounding_mode: rmNearestTiesToEven);
2298 assert(fs == opOK);
2299 fs = VEx.subtract(rhs: PEx, rounding_mode: rmNearestTiesToEven);
2300 assert(fs == opOK);
2301
2302 cmpResult result = VEx.compare(PEx);
2303 if (result == cmpGreaterThan || result == cmpEqual) {
2304 fs = subtract(rhs: P, rounding_mode: rmNearestTiesToEven);
2305 assert(fs == opOK);
2306 }
2307 }
2308
2309 if (isZero()) {
2310 sign = origSign; // IEEE754 requires this
2311 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2312 // But some 8-bit floats only have positive 0.
2313 sign = false;
2314 }
2315
2316 else
2317 sign ^= origSign;
2318 return fs;
2319}
2320
2321/* Normalized llvm frem (C fmod). */
2322APFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
2323 opStatus fs;
2324 fs = modSpecials(rhs);
2325 unsigned int origSign = sign;
2326
2327 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2328 compareAbsoluteValue(rhs) != cmpLessThan) {
2329 int Exp = ilogb(Arg: *this) - ilogb(Arg: rhs);
2330 IEEEFloat V = scalbn(X: rhs, Exp, rmNearestTiesToEven);
2331 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2332 // check for it.
2333 if (V.isNaN() || compareAbsoluteValue(rhs: V) == cmpLessThan)
2334 V = scalbn(X: rhs, Exp: Exp - 1, rmNearestTiesToEven);
2335 V.sign = sign;
2336
2337 fs = subtract(rhs: V, rounding_mode: rmNearestTiesToEven);
2338
2339 // When the semantics supports zero, this loop's
2340 // exit-condition is handled by the 'isFiniteNonZero'
2341 // category check above. However, when the semantics
2342 // does not have 'fcZero' and we have reached the
2343 // minimum possible value, (and any further subtract
2344 // will underflow to the same value) explicitly
2345 // provide an exit-path here.
2346 if (!semantics->hasZero && this->isSmallest())
2347 break;
2348
2349 assert(fs==opOK);
2350 }
2351 if (isZero()) {
2352 sign = origSign; // fmod requires this
2353 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2354 sign = false;
2355 }
2356 return fs;
2357}
2358
2359/* Normalized fused-multiply-add. */
2360APFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
2361 const IEEEFloat &addend,
2362 roundingMode rounding_mode) {
2363 opStatus fs;
2364
2365 /* Post-multiplication sign, before addition. */
2366 sign ^= multiplicand.sign;
2367
2368 /* If and only if all arguments are normal do we need to do an
2369 extended-precision calculation. */
2370 if (isFiniteNonZero() &&
2371 multiplicand.isFiniteNonZero() &&
2372 addend.isFinite()) {
2373 lostFraction lost_fraction;
2374
2375 lost_fraction = multiplySignificand(rhs: multiplicand, addend);
2376 fs = normalize(rounding_mode, lost_fraction);
2377 if (lost_fraction != lfExactlyZero)
2378 fs = (opStatus) (fs | opInexact);
2379
2380 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2381 positive zero unless rounding to minus infinity, except that
2382 adding two like-signed zeroes gives that zero. */
2383 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2384 sign = (rounding_mode == rmTowardNegative);
2385 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2386 sign = false;
2387 }
2388 } else {
2389 fs = multiplySpecials(rhs: multiplicand);
2390
2391 /* FS can only be opOK or opInvalidOp. There is no more work
2392 to do in the latter case. The IEEE-754R standard says it is
2393 implementation-defined in this case whether, if ADDEND is a
2394 quiet NaN, we raise invalid op; this implementation does so.
2395
2396 If we need to do the addition we can do so with normal
2397 precision. */
2398 if (fs == opOK)
2399 fs = addOrSubtract(rhs: addend, rounding_mode, subtract: false);
2400 }
2401
2402 return fs;
2403}
2404
2405/* Rounding-mode correct round to integral value. */
2406APFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2407 opStatus fs;
2408
2409 if (isInfinity())
2410 // [IEEE Std 754-2008 6.1]:
2411 // The behavior of infinity in floating-point arithmetic is derived from the
2412 // limiting cases of real arithmetic with operands of arbitrarily
2413 // large magnitude, when such a limit exists.
2414 // ...
2415 // Operations on infinite operands are usually exact and therefore signal no
2416 // exceptions ...
2417 return opOK;
2418
2419 if (isNaN()) {
2420 if (isSignaling()) {
2421 // [IEEE Std 754-2008 6.2]:
2422 // Under default exception handling, any operation signaling an invalid
2423 // operation exception and for which a floating-point result is to be
2424 // delivered shall deliver a quiet NaN.
2425 makeQuiet();
2426 // [IEEE Std 754-2008 6.2]:
2427 // Signaling NaNs shall be reserved operands that, under default exception
2428 // handling, signal the invalid operation exception(see 7.2) for every
2429 // general-computational and signaling-computational operation except for
2430 // the conversions described in 5.12.
2431 return opInvalidOp;
2432 } else {
2433 // [IEEE Std 754-2008 6.2]:
2434 // For an operation with quiet NaN inputs, other than maximum and minimum
2435 // operations, if a floating-point result is to be delivered the result
2436 // shall be a quiet NaN which should be one of the input NaNs.
2437 // ...
2438 // Every general-computational and quiet-computational operation involving
2439 // one or more input NaNs, none of them signaling, shall signal no
2440 // exception, except fusedMultiplyAdd might signal the invalid operation
2441 // exception(see 7.2).
2442 return opOK;
2443 }
2444 }
2445
2446 if (isZero()) {
2447 // [IEEE Std 754-2008 6.3]:
2448 // ... the sign of the result of conversions, the quantize operation, the
2449 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2450 // the sign of the first or only operand.
2451 return opOK;
2452 }
2453
2454 // If the exponent is large enough, we know that this value is already
2455 // integral, and the arithmetic below would potentially cause it to saturate
2456 // to +/-Inf. Bail out early instead.
2457 if (exponent + 1 >= (int)APFloat::semanticsPrecision(semantics: *semantics))
2458 return opOK;
2459
2460 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2461 // precision of our format, and then subtract it back off again. The choice
2462 // of rounding modes for the addition/subtraction determines the rounding mode
2463 // for our integral rounding as well.
2464 // NOTE: When the input value is negative, we do subtraction followed by
2465 // addition instead.
2466 APInt IntegerConstant(NextPowerOf2(A: APFloat::semanticsPrecision(semantics: *semantics)),
2467 1);
2468 IntegerConstant <<= APFloat::semanticsPrecision(semantics: *semantics) - 1;
2469 IEEEFloat MagicConstant(*semantics);
2470 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2471 rmNearestTiesToEven);
2472 assert(fs == opOK);
2473 MagicConstant.sign = sign;
2474
2475 // Preserve the input sign so that we can handle the case of zero result
2476 // correctly.
2477 bool inputSign = isNegative();
2478
2479 fs = add(rhs: MagicConstant, rounding_mode);
2480
2481 // Current value and 'MagicConstant' are both integers, so the result of the
2482 // subtraction is always exact according to Sterbenz' lemma.
2483 subtract(rhs: MagicConstant, rounding_mode);
2484
2485 // Restore the input sign.
2486 if (inputSign != isNegative())
2487 changeSign();
2488
2489 return fs;
2490}
2491
2492/* Comparison requires normalized numbers. */
2493APFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2494 cmpResult result;
2495
2496 assert(semantics == rhs.semantics);
2497
2498 switch (PackCategoriesIntoKey(category, rhs.category)) {
2499 default:
2500 llvm_unreachable(nullptr);
2501
2502 case PackCategoriesIntoKey(fcNaN, fcZero):
2503 case PackCategoriesIntoKey(fcNaN, fcNormal):
2504 case PackCategoriesIntoKey(fcNaN, fcInfinity):
2505 case PackCategoriesIntoKey(fcNaN, fcNaN):
2506 case PackCategoriesIntoKey(fcZero, fcNaN):
2507 case PackCategoriesIntoKey(fcNormal, fcNaN):
2508 case PackCategoriesIntoKey(fcInfinity, fcNaN):
2509 return cmpUnordered;
2510
2511 case PackCategoriesIntoKey(fcInfinity, fcNormal):
2512 case PackCategoriesIntoKey(fcInfinity, fcZero):
2513 case PackCategoriesIntoKey(fcNormal, fcZero):
2514 if (sign)
2515 return cmpLessThan;
2516 else
2517 return cmpGreaterThan;
2518
2519 case PackCategoriesIntoKey(fcNormal, fcInfinity):
2520 case PackCategoriesIntoKey(fcZero, fcInfinity):
2521 case PackCategoriesIntoKey(fcZero, fcNormal):
2522 if (rhs.sign)
2523 return cmpGreaterThan;
2524 else
2525 return cmpLessThan;
2526
2527 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2528 if (sign == rhs.sign)
2529 return cmpEqual;
2530 else if (sign)
2531 return cmpLessThan;
2532 else
2533 return cmpGreaterThan;
2534
2535 case PackCategoriesIntoKey(fcZero, fcZero):
2536 return cmpEqual;
2537
2538 case PackCategoriesIntoKey(fcNormal, fcNormal):
2539 break;
2540 }
2541
2542 /* Two normal numbers. Do they have the same sign? */
2543 if (sign != rhs.sign) {
2544 if (sign)
2545 result = cmpLessThan;
2546 else
2547 result = cmpGreaterThan;
2548 } else {
2549 /* Compare absolute values; invert result if negative. */
2550 result = compareAbsoluteValue(rhs);
2551
2552 if (sign) {
2553 if (result == cmpLessThan)
2554 result = cmpGreaterThan;
2555 else if (result == cmpGreaterThan)
2556 result = cmpLessThan;
2557 }
2558 }
2559
2560 return result;
2561}
2562
2563/// IEEEFloat::convert - convert a value of one floating point type to another.
2564/// The return value corresponds to the IEEE754 exceptions. *losesInfo
2565/// records whether the transformation lost information, i.e. whether
2566/// converting the result back to the original type will produce the
2567/// original value (this is almost the same as return value==fsOK, but there
2568/// are edge cases where this is not so).
2569
2570APFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2571 roundingMode rounding_mode,
2572 bool *losesInfo) {
2573 lostFraction lostFraction;
2574 unsigned int newPartCount, oldPartCount;
2575 opStatus fs;
2576 int shift;
2577 const fltSemantics &fromSemantics = *semantics;
2578 bool is_signaling = isSignaling();
2579
2580 lostFraction = lfExactlyZero;
2581 newPartCount = partCountForBits(bits: toSemantics.precision + 1);
2582 oldPartCount = partCount();
2583 shift = toSemantics.precision - fromSemantics.precision;
2584
2585 bool X86SpecialNan = false;
2586 if (&fromSemantics == &semX87DoubleExtended &&
2587 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2588 (!(*significandParts() & 0x8000000000000000ULL) ||
2589 !(*significandParts() & 0x4000000000000000ULL))) {
2590 // x86 has some unusual NaNs which cannot be represented in any other
2591 // format; note them here.
2592 X86SpecialNan = true;
2593 }
2594
2595 // If this is a truncation of a denormal number, and the target semantics
2596 // has larger exponent range than the source semantics (this can happen
2597 // when truncating from PowerPC double-double to double format), the
2598 // right shift could lose result mantissa bits. Adjust exponent instead
2599 // of performing excessive shift.
2600 // Also do a similar trick in case shifting denormal would produce zero
2601 // significand as this case isn't handled correctly by normalize.
2602 if (shift < 0 && isFiniteNonZero()) {
2603 int omsb = significandMSB() + 1;
2604 int exponentChange = omsb - fromSemantics.precision;
2605 if (exponent + exponentChange < toSemantics.minExponent)
2606 exponentChange = toSemantics.minExponent - exponent;
2607 if (exponentChange < shift)
2608 exponentChange = shift;
2609 if (exponentChange < 0) {
2610 shift -= exponentChange;
2611 exponent += exponentChange;
2612 } else if (omsb <= -shift) {
2613 exponentChange = omsb + shift - 1; // leave at least one bit set
2614 shift -= exponentChange;
2615 exponent += exponentChange;
2616 }
2617 }
2618
2619 // If this is a truncation, perform the shift before we narrow the storage.
2620 if (shift < 0 && (isFiniteNonZero() ||
2621 (category == fcNaN && semantics->nonFiniteBehavior !=
2622 fltNonfiniteBehavior::NanOnly)))
2623 lostFraction = shiftRight(dst: significandParts(), parts: oldPartCount, bits: -shift);
2624
2625 // Fix the storage so it can hold to new value.
2626 if (newPartCount > oldPartCount) {
2627 // The new type requires more storage; make it available.
2628 integerPart *newParts;
2629 newParts = new integerPart[newPartCount];
2630 APInt::tcSet(newParts, 0, newPartCount);
2631 if (isFiniteNonZero() || category==fcNaN)
2632 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2633 freeSignificand();
2634 significand.parts = newParts;
2635 } else if (newPartCount == 1 && oldPartCount != 1) {
2636 // Switch to built-in storage for a single part.
2637 integerPart newPart = 0;
2638 if (isFiniteNonZero() || category==fcNaN)
2639 newPart = significandParts()[0];
2640 freeSignificand();
2641 significand.part = newPart;
2642 }
2643
2644 // Now that we have the right storage, switch the semantics.
2645 semantics = &toSemantics;
2646
2647 // If this is an extension, perform the shift now that the storage is
2648 // available.
2649 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2650 APInt::tcShiftLeft(significandParts(), Words: newPartCount, Count: shift);
2651
2652 if (isFiniteNonZero()) {
2653 fs = normalize(rounding_mode, lost_fraction: lostFraction);
2654 *losesInfo = (fs != opOK);
2655 } else if (category == fcNaN) {
2656 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2657 *losesInfo =
2658 fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;
2659 makeNaN(SNaN: false, Negative: sign);
2660 return is_signaling ? opInvalidOp : opOK;
2661 }
2662
2663 // If NaN is negative zero, we need to create a new NaN to avoid converting
2664 // NaN to -Inf.
2665 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2666 semantics->nanEncoding != fltNanEncoding::NegativeZero)
2667 makeNaN(SNaN: false, Negative: false);
2668
2669 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2670
2671 // For x87 extended precision, we want to make a NaN, not a special NaN if
2672 // the input wasn't special either.
2673 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2674 APInt::tcSetBit(significandParts(), bit: semantics->precision - 1);
2675
2676 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2677 // This also guarantees that a sNaN does not become Inf on a truncation
2678 // that loses all payload bits.
2679 if (is_signaling) {
2680 makeQuiet();
2681 fs = opInvalidOp;
2682 } else {
2683 fs = opOK;
2684 }
2685 } else if (category == fcInfinity &&
2686 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2687 makeNaN(SNaN: false, Negative: sign);
2688 *losesInfo = true;
2689 fs = opInexact;
2690 } else if (category == fcZero &&
2691 semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2692 // Negative zero loses info, but positive zero doesn't.
2693 *losesInfo =
2694 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2695 fs = *losesInfo ? opInexact : opOK;
2696 // NaN is negative zero means -0 -> +0, which can lose information
2697 sign = false;
2698 } else {
2699 *losesInfo = false;
2700 fs = opOK;
2701 }
2702
2703 if (category == fcZero && !semantics->hasZero)
2704 makeSmallestNormalized(Negative: false);
2705 return fs;
2706}
2707
2708/* Convert a floating point number to an integer according to the
2709 rounding mode. If the rounded integer value is out of range this
2710 returns an invalid operation exception and the contents of the
2711 destination parts are unspecified. If the rounded value is in
2712 range but the floating point number is not the exact integer, the C
2713 standard doesn't require an inexact exception to be raised. IEEE
2714 854 does require it so we do that.
2715
2716 Note that for conversions to integer type the C standard requires
2717 round-to-zero to always be used. */
2718APFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2719 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2720 roundingMode rounding_mode, bool *isExact) const {
2721 lostFraction lost_fraction;
2722 const integerPart *src;
2723 unsigned int dstPartsCount, truncatedBits;
2724
2725 *isExact = false;
2726
2727 /* Handle the three special cases first. */
2728 if (category == fcInfinity || category == fcNaN)
2729 return opInvalidOp;
2730
2731 dstPartsCount = partCountForBits(bits: width);
2732 assert(dstPartsCount <= parts.size() && "Integer too big");
2733
2734 if (category == fcZero) {
2735 APInt::tcSet(parts.data(), 0, dstPartsCount);
2736 // Negative zero can't be represented as an int.
2737 *isExact = !sign;
2738 return opOK;
2739 }
2740
2741 src = significandParts();
2742
2743 /* Step 1: place our absolute value, with any fraction truncated, in
2744 the destination. */
2745 if (exponent < 0) {
2746 /* Our absolute value is less than one; truncate everything. */
2747 APInt::tcSet(parts.data(), 0, dstPartsCount);
2748 /* For exponent -1 the integer bit represents .5, look at that.
2749 For smaller exponents leftmost truncated bit is 0. */
2750 truncatedBits = semantics->precision -1U - exponent;
2751 } else {
2752 /* We want the most significant (exponent + 1) bits; the rest are
2753 truncated. */
2754 unsigned int bits = exponent + 1U;
2755
2756 /* Hopelessly large in magnitude? */
2757 if (bits > width)
2758 return opInvalidOp;
2759
2760 if (bits < semantics->precision) {
2761 /* We truncate (semantics->precision - bits) bits. */
2762 truncatedBits = semantics->precision - bits;
2763 APInt::tcExtract(parts.data(), dstCount: dstPartsCount, src, srcBits: bits, srcLSB: truncatedBits);
2764 } else {
2765 /* We want at least as many bits as are available. */
2766 APInt::tcExtract(parts.data(), dstCount: dstPartsCount, src, srcBits: semantics->precision,
2767 srcLSB: 0);
2768 APInt::tcShiftLeft(parts.data(), Words: dstPartsCount,
2769 Count: bits - semantics->precision);
2770 truncatedBits = 0;
2771 }
2772 }
2773
2774 /* Step 2: work out any lost fraction, and increment the absolute
2775 value if we would round away from zero. */
2776 if (truncatedBits) {
2777 lost_fraction = lostFractionThroughTruncation(parts: src, partCount: partCount(),
2778 bits: truncatedBits);
2779 if (lost_fraction != lfExactlyZero &&
2780 roundAwayFromZero(rounding_mode, lost_fraction, bit: truncatedBits)) {
2781 if (APInt::tcIncrement(dst: parts.data(), parts: dstPartsCount))
2782 return opInvalidOp; /* Overflow. */
2783 }
2784 } else {
2785 lost_fraction = lfExactlyZero;
2786 }
2787
2788 /* Step 3: check if we fit in the destination. */
2789 unsigned int omsb = APInt::tcMSB(parts: parts.data(), n: dstPartsCount) + 1;
2790
2791 if (sign) {
2792 if (!isSigned) {
2793 /* Negative numbers cannot be represented as unsigned. */
2794 if (omsb != 0)
2795 return opInvalidOp;
2796 } else {
2797 /* It takes omsb bits to represent the unsigned integer value.
2798 We lose a bit for the sign, but care is needed as the
2799 maximally negative integer is a special case. */
2800 if (omsb == width &&
2801 APInt::tcLSB(parts.data(), n: dstPartsCount) + 1 != omsb)
2802 return opInvalidOp;
2803
2804 /* This case can happen because of rounding. */
2805 if (omsb > width)
2806 return opInvalidOp;
2807 }
2808
2809 APInt::tcNegate (parts.data(), dstPartsCount);
2810 } else {
2811 if (omsb >= width + !isSigned)
2812 return opInvalidOp;
2813 }
2814
2815 if (lost_fraction == lfExactlyZero) {
2816 *isExact = true;
2817 return opOK;
2818 }
2819 return opInexact;
2820}
2821
2822/* Same as convertToSignExtendedInteger, except we provide
2823 deterministic values in case of an invalid operation exception,
2824 namely zero for NaNs and the minimal or maximal value respectively
2825 for underflow or overflow.
2826 The *isExact output tells whether the result is exact, in the sense
2827 that converting it back to the original floating point type produces
2828 the original value. This is almost equivalent to result==opOK,
2829 except for negative zeroes.
2830*/
2831APFloat::opStatus
2832IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2833 unsigned int width, bool isSigned,
2834 roundingMode rounding_mode, bool *isExact) const {
2835 opStatus fs;
2836
2837 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2838 isExact);
2839
2840 if (fs == opInvalidOp) {
2841 unsigned int bits, dstPartsCount;
2842
2843 dstPartsCount = partCountForBits(bits: width);
2844 assert(dstPartsCount <= parts.size() && "Integer too big");
2845
2846 if (category == fcNaN)
2847 bits = 0;
2848 else if (sign)
2849 bits = isSigned;
2850 else
2851 bits = width - isSigned;
2852
2853 tcSetLeastSignificantBits(dst: parts.data(), parts: dstPartsCount, bits);
2854 if (sign && isSigned)
2855 APInt::tcShiftLeft(parts.data(), Words: dstPartsCount, Count: width - 1);
2856 }
2857
2858 return fs;
2859}
2860
2861/* Convert an unsigned integer SRC to a floating point number,
2862 rounding according to ROUNDING_MODE. The sign of the floating
2863 point number is not modified. */
2864APFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2865 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2866 unsigned int omsb, precision, dstCount;
2867 integerPart *dst;
2868 lostFraction lost_fraction;
2869
2870 category = fcNormal;
2871 omsb = APInt::tcMSB(parts: src, n: srcCount) + 1;
2872 dst = significandParts();
2873 dstCount = partCount();
2874 precision = semantics->precision;
2875
2876 /* We want the most significant PRECISION bits of SRC. There may not
2877 be that many; extract what we can. */
2878 if (precision <= omsb) {
2879 exponent = omsb - 1;
2880 lost_fraction = lostFractionThroughTruncation(parts: src, partCount: srcCount,
2881 bits: omsb - precision);
2882 APInt::tcExtract(dst, dstCount, src, srcBits: precision, srcLSB: omsb - precision);
2883 } else {
2884 exponent = precision - 1;
2885 lost_fraction = lfExactlyZero;
2886 APInt::tcExtract(dst, dstCount, src, srcBits: omsb, srcLSB: 0);
2887 }
2888
2889 return normalize(rounding_mode, lost_fraction);
2890}
2891
2892APFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2893 roundingMode rounding_mode) {
2894 unsigned int partCount = Val.getNumWords();
2895 APInt api = Val;
2896
2897 sign = false;
2898 if (isSigned && api.isNegative()) {
2899 sign = true;
2900 api = -api;
2901 }
2902
2903 return convertFromUnsignedParts(src: api.getRawData(), srcCount: partCount, rounding_mode);
2904}
2905
2906/* Convert a two's complement integer SRC to a floating point number,
2907 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2908 integer is signed, in which case it must be sign-extended. */
2909APFloat::opStatus
2910IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2911 unsigned int srcCount, bool isSigned,
2912 roundingMode rounding_mode) {
2913 opStatus status;
2914
2915 if (isSigned &&
2916 APInt::tcExtractBit(src, bit: srcCount * integerPartWidth - 1)) {
2917 integerPart *copy;
2918
2919 /* If we're signed and negative negate a copy. */
2920 sign = true;
2921 copy = new integerPart[srcCount];
2922 APInt::tcAssign(copy, src, srcCount);
2923 APInt::tcNegate(copy, srcCount);
2924 status = convertFromUnsignedParts(src: copy, srcCount, rounding_mode);
2925 delete [] copy;
2926 } else {
2927 sign = false;
2928 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2929 }
2930
2931 return status;
2932}
2933
2934/* FIXME: should this just take a const APInt reference? */
2935APFloat::opStatus
2936IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2937 unsigned int width, bool isSigned,
2938 roundingMode rounding_mode) {
2939 unsigned int partCount = partCountForBits(bits: width);
2940 APInt api = APInt(width, ArrayRef(parts, partCount));
2941
2942 sign = false;
2943 if (isSigned && APInt::tcExtractBit(parts, bit: width - 1)) {
2944 sign = true;
2945 api = -api;
2946 }
2947
2948 return convertFromUnsignedParts(src: api.getRawData(), srcCount: partCount, rounding_mode);
2949}
2950
2951Expected<APFloat::opStatus>
2952IEEEFloat::convertFromHexadecimalString(StringRef s,
2953 roundingMode rounding_mode) {
2954 lostFraction lost_fraction = lfExactlyZero;
2955
2956 category = fcNormal;
2957 zeroSignificand();
2958 exponent = 0;
2959
2960 integerPart *significand = significandParts();
2961 unsigned partsCount = partCount();
2962 unsigned bitPos = partsCount * integerPartWidth;
2963 bool computedTrailingFraction = false;
2964
2965 // Skip leading zeroes and any (hexa)decimal point.
2966 StringRef::iterator begin = s.begin();
2967 StringRef::iterator end = s.end();
2968 StringRef::iterator dot;
2969 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, dot: &dot);
2970 if (!PtrOrErr)
2971 return PtrOrErr.takeError();
2972 StringRef::iterator p = *PtrOrErr;
2973 StringRef::iterator firstSignificantDigit = p;
2974
2975 while (p != end) {
2976 integerPart hex_value;
2977
2978 if (*p == '.') {
2979 if (dot != end)
2980 return createError(Err: "String contains multiple dots");
2981 dot = p++;
2982 continue;
2983 }
2984
2985 hex_value = hexDigitValue(C: *p);
2986 if (hex_value == UINT_MAX)
2987 break;
2988
2989 p++;
2990
2991 // Store the number while we have space.
2992 if (bitPos) {
2993 bitPos -= 4;
2994 hex_value <<= bitPos % integerPartWidth;
2995 significand[bitPos / integerPartWidth] |= hex_value;
2996 } else if (!computedTrailingFraction) {
2997 auto FractOrErr = trailingHexadecimalFraction(p, end, digitValue: hex_value);
2998 if (!FractOrErr)
2999 return FractOrErr.takeError();
3000 lost_fraction = *FractOrErr;
3001 computedTrailingFraction = true;
3002 }
3003 }
3004
3005 /* Hex floats require an exponent but not a hexadecimal point. */
3006 if (p == end)
3007 return createError(Err: "Hex strings require an exponent");
3008 if (*p != 'p' && *p != 'P')
3009 return createError(Err: "Invalid character in significand");
3010 if (p == begin)
3011 return createError(Err: "Significand has no digits");
3012 if (dot != end && p - begin == 1)
3013 return createError(Err: "Significand has no digits");
3014
3015 /* Ignore the exponent if we are zero. */
3016 if (p != firstSignificantDigit) {
3017 int expAdjustment;
3018
3019 /* Implicit hexadecimal point? */
3020 if (dot == end)
3021 dot = p;
3022
3023 /* Calculate the exponent adjustment implicit in the number of
3024 significant digits. */
3025 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
3026 if (expAdjustment < 0)
3027 expAdjustment++;
3028 expAdjustment = expAdjustment * 4 - 1;
3029
3030 /* Adjust for writing the significand starting at the most
3031 significant nibble. */
3032 expAdjustment += semantics->precision;
3033 expAdjustment -= partsCount * integerPartWidth;
3034
3035 /* Adjust for the given exponent. */
3036 auto ExpOrErr = totalExponent(p: p + 1, end, exponentAdjustment: expAdjustment);
3037 if (!ExpOrErr)
3038 return ExpOrErr.takeError();
3039 exponent = *ExpOrErr;
3040 }
3041
3042 return normalize(rounding_mode, lost_fraction);
3043}
3044
3045APFloat::opStatus
3046IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
3047 unsigned sigPartCount, int exp,
3048 roundingMode rounding_mode) {
3049 unsigned int parts, pow5PartCount;
3050 fltSemantics calcSemantics = { .maxExponent: 32767, .minExponent: -32767, .precision: 0, .sizeInBits: 0 };
3051 integerPart pow5Parts[maxPowerOfFiveParts];
3052 bool isNearest;
3053
3054 isNearest = (rounding_mode == rmNearestTiesToEven ||
3055 rounding_mode == rmNearestTiesToAway);
3056
3057 parts = partCountForBits(bits: semantics->precision + 11);
3058
3059 /* Calculate pow(5, abs(exp)). */
3060 pow5PartCount = powerOf5(dst: pow5Parts, power: exp >= 0 ? exp: -exp);
3061
3062 for (;; parts *= 2) {
3063 opStatus sigStatus, powStatus;
3064 unsigned int excessPrecision, truncatedBits;
3065
3066 calcSemantics.precision = parts * integerPartWidth - 1;
3067 excessPrecision = calcSemantics.precision - semantics->precision;
3068 truncatedBits = excessPrecision;
3069
3070 IEEEFloat decSig(calcSemantics, uninitialized);
3071 decSig.makeZero(Neg: sign);
3072 IEEEFloat pow5(calcSemantics);
3073
3074 sigStatus = decSig.convertFromUnsignedParts(src: decSigParts, srcCount: sigPartCount,
3075 rounding_mode: rmNearestTiesToEven);
3076 powStatus = pow5.convertFromUnsignedParts(src: pow5Parts, srcCount: pow5PartCount,
3077 rounding_mode: rmNearestTiesToEven);
3078 /* Add exp, as 10^n = 5^n * 2^n. */
3079 decSig.exponent += exp;
3080
3081 lostFraction calcLostFraction;
3082 integerPart HUerr, HUdistance;
3083 unsigned int powHUerr;
3084
3085 if (exp >= 0) {
3086 /* multiplySignificand leaves the precision-th bit set to 1. */
3087 calcLostFraction = decSig.multiplySignificand(rhs: pow5);
3088 powHUerr = powStatus != opOK;
3089 } else {
3090 calcLostFraction = decSig.divideSignificand(rhs: pow5);
3091 /* Denormal numbers have less precision. */
3092 if (decSig.exponent < semantics->minExponent) {
3093 excessPrecision += (semantics->minExponent - decSig.exponent);
3094 truncatedBits = excessPrecision;
3095 if (excessPrecision > calcSemantics.precision)
3096 excessPrecision = calcSemantics.precision;
3097 }
3098 /* Extra half-ulp lost in reciprocal of exponent. */
3099 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
3100 }
3101
3102 /* Both multiplySignificand and divideSignificand return the
3103 result with the integer bit set. */
3104 assert(APInt::tcExtractBit
3105 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
3106
3107 HUerr = HUerrBound(inexactMultiply: calcLostFraction != lfExactlyZero, HUerr1: sigStatus != opOK,
3108 HUerr2: powHUerr);
3109 HUdistance = 2 * ulpsFromBoundary(parts: decSig.significandParts(),
3110 bits: excessPrecision, isNearest);
3111
3112 /* Are we guaranteed to round correctly if we truncate? */
3113 if (HUdistance >= HUerr) {
3114 APInt::tcExtract(significandParts(), dstCount: partCount(), decSig.significandParts(),
3115 srcBits: calcSemantics.precision - excessPrecision,
3116 srcLSB: excessPrecision);
3117 /* Take the exponent of decSig. If we tcExtract-ed less bits
3118 above we must adjust our exponent to compensate for the
3119 implicit right shift. */
3120 exponent = (decSig.exponent + semantics->precision
3121 - (calcSemantics.precision - excessPrecision));
3122 calcLostFraction = lostFractionThroughTruncation(parts: decSig.significandParts(),
3123 partCount: decSig.partCount(),
3124 bits: truncatedBits);
3125 return normalize(rounding_mode, lost_fraction: calcLostFraction);
3126 }
3127 }
3128}
3129
3130Expected<APFloat::opStatus>
3131IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
3132 decimalInfo D;
3133 opStatus fs;
3134
3135 /* Scan the text. */
3136 StringRef::iterator p = str.begin();
3137 if (Error Err = interpretDecimal(begin: p, end: str.end(), D: &D))
3138 return std::move(Err);
3139
3140 /* Handle the quick cases. First the case of no significant digits,
3141 i.e. zero, and then exponents that are obviously too large or too
3142 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
3143 definitely overflows if
3144
3145 (exp - 1) * L >= maxExponent
3146
3147 and definitely underflows to zero where
3148
3149 (exp + 1) * L <= minExponent - precision
3150
3151 With integer arithmetic the tightest bounds for L are
3152
3153 93/28 < L < 196/59 [ numerator <= 256 ]
3154 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
3155 */
3156
3157 // Test if we have a zero number allowing for strings with no null terminators
3158 // and zero decimals with non-zero exponents.
3159 //
3160 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3161 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3162 // be at most one dot. On the other hand, if we have a zero with a non-zero
3163 // exponent, then we know that D.firstSigDigit will be non-numeric.
3164 if (D.firstSigDigit == str.end() || decDigitValue(c: *D.firstSigDigit) >= 10U) {
3165 category = fcZero;
3166 fs = opOK;
3167 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3168 sign = false;
3169 if (!semantics->hasZero)
3170 makeSmallestNormalized(Negative: false);
3171
3172 /* Check whether the normalized exponent is high enough to overflow
3173 max during the log-rebasing in the max-exponent check below. */
3174 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3175 fs = handleOverflow(rounding_mode);
3176
3177 /* If it wasn't, then it also wasn't high enough to overflow max
3178 during the log-rebasing in the min-exponent check. Check that it
3179 won't overflow min in either check, then perform the min-exponent
3180 check. */
3181 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3182 (D.normalizedExponent + 1) * 28738 <=
3183 8651 * (semantics->minExponent - (int) semantics->precision)) {
3184 /* Underflow to zero and round. */
3185 category = fcNormal;
3186 zeroSignificand();
3187 fs = normalize(rounding_mode, lost_fraction: lfLessThanHalf);
3188
3189 /* We can finally safely perform the max-exponent check. */
3190 } else if ((D.normalizedExponent - 1) * 42039
3191 >= 12655 * semantics->maxExponent) {
3192 /* Overflow and round. */
3193 fs = handleOverflow(rounding_mode);
3194 } else {
3195 integerPart *decSignificand;
3196 unsigned int partCount;
3197
3198 /* A tight upper bound on number of bits required to hold an
3199 N-digit decimal integer is N * 196 / 59. Allocate enough space
3200 to hold the full significand, and an extra part required by
3201 tcMultiplyPart. */
3202 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3203 partCount = partCountForBits(bits: 1 + 196 * partCount / 59);
3204 decSignificand = new integerPart[partCount + 1];
3205 partCount = 0;
3206
3207 /* Convert to binary efficiently - we do almost all multiplication
3208 in an integerPart. When this would overflow do we do a single
3209 bignum multiplication, and then revert again to multiplication
3210 in an integerPart. */
3211 do {
3212 integerPart decValue, val, multiplier;
3213
3214 val = 0;
3215 multiplier = 1;
3216
3217 do {
3218 if (*p == '.') {
3219 p++;
3220 if (p == str.end()) {
3221 break;
3222 }
3223 }
3224 decValue = decDigitValue(c: *p++);
3225 if (decValue >= 10U) {
3226 delete[] decSignificand;
3227 return createError(Err: "Invalid character in significand");
3228 }
3229 multiplier *= 10;
3230 val = val * 10 + decValue;
3231 /* The maximum number that can be multiplied by ten with any
3232 digit added without overflowing an integerPart. */
3233 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3234
3235 /* Multiply out the current part. */
3236 APInt::tcMultiplyPart(dst: decSignificand, src: decSignificand, multiplier, carry: val,
3237 srcParts: partCount, dstParts: partCount + 1, add: false);
3238
3239 /* If we used another part (likely but not guaranteed), increase
3240 the count. */
3241 if (decSignificand[partCount])
3242 partCount++;
3243 } while (p <= D.lastSigDigit);
3244
3245 category = fcNormal;
3246 fs = roundSignificandWithExponent(decSigParts: decSignificand, sigPartCount: partCount,
3247 exp: D.exponent, rounding_mode);
3248
3249 delete [] decSignificand;
3250 }
3251
3252 return fs;
3253}
3254
3255bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3256 const size_t MIN_NAME_SIZE = 3;
3257
3258 if (str.size() < MIN_NAME_SIZE)
3259 return false;
3260
3261 if (str == "inf" || str == "INFINITY" || str == "+Inf") {
3262 makeInf(Neg: false);
3263 return true;
3264 }
3265
3266 bool IsNegative = str.consume_front(Prefix: "-");
3267 if (IsNegative) {
3268 if (str.size() < MIN_NAME_SIZE)
3269 return false;
3270
3271 if (str == "inf" || str == "INFINITY" || str == "Inf") {
3272 makeInf(Neg: true);
3273 return true;
3274 }
3275 }
3276
3277 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3278 bool IsSignaling = str.consume_front_insensitive(Prefix: "s");
3279 if (IsSignaling) {
3280 if (str.size() < MIN_NAME_SIZE)
3281 return false;
3282 }
3283
3284 if (str.consume_front(Prefix: "nan") || str.consume_front(Prefix: "NaN")) {
3285 // A NaN without payload.
3286 if (str.empty()) {
3287 makeNaN(SNaN: IsSignaling, Negative: IsNegative);
3288 return true;
3289 }
3290
3291 // Allow the payload to be inside parentheses.
3292 if (str.front() == '(') {
3293 // Parentheses should be balanced (and not empty).
3294 if (str.size() <= 2 || str.back() != ')')
3295 return false;
3296
3297 str = str.slice(Start: 1, End: str.size() - 1);
3298 }
3299
3300 // Determine the payload number's radix.
3301 unsigned Radix = 10;
3302 if (str[0] == '0') {
3303 if (str.size() > 1 && tolower(c: str[1]) == 'x') {
3304 str = str.drop_front(N: 2);
3305 Radix = 16;
3306 } else {
3307 Radix = 8;
3308 }
3309 }
3310
3311 // Parse the payload and make the NaN.
3312 APInt Payload;
3313 if (!str.getAsInteger(Radix, Result&: Payload)) {
3314 makeNaN(SNaN: IsSignaling, Negative: IsNegative, fill: &Payload);
3315 return true;
3316 }
3317 }
3318
3319 return false;
3320}
3321
3322Expected<APFloat::opStatus>
3323IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
3324 if (str.empty())
3325 return createError(Err: "Invalid string length");
3326
3327 // Handle special cases.
3328 if (convertFromStringSpecials(str))
3329 return opOK;
3330
3331 /* Handle a leading minus sign. */
3332 StringRef::iterator p = str.begin();
3333 size_t slen = str.size();
3334 sign = *p == '-' ? 1 : 0;
3335 if (sign && !semantics->hasSignedRepr)
3336 llvm_unreachable(
3337 "This floating point format does not support signed values");
3338
3339 if (*p == '-' || *p == '+') {
3340 p++;
3341 slen--;
3342 if (!slen)
3343 return createError(Err: "String has no digits");
3344 }
3345
3346 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3347 if (slen == 2)
3348 return createError(Err: "Invalid string");
3349 return convertFromHexadecimalString(s: StringRef(p + 2, slen - 2),
3350 rounding_mode);
3351 }
3352
3353 return convertFromDecimalString(str: StringRef(p, slen), rounding_mode);
3354}
3355
3356/* Write out a hexadecimal representation of the floating point value
3357 to DST, which must be of sufficient size, in the C99 form
3358 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3359 excluding the terminating NUL.
3360
3361 If UPPERCASE, the output is in upper case, otherwise in lower case.
3362
3363 HEXDIGITS digits appear altogether, rounding the value if
3364 necessary. If HEXDIGITS is 0, the minimal precision to display the
3365 number precisely is used instead. If nothing would appear after
3366 the decimal point it is suppressed.
3367
3368 The decimal exponent is always printed and has at least one digit.
3369 Zero values display an exponent of zero. Infinities and NaNs
3370 appear as "infinity" or "nan" respectively.
3371
3372 The above rules are as specified by C99. There is ambiguity about
3373 what the leading hexadecimal digit should be. This implementation
3374 uses whatever is necessary so that the exponent is displayed as
3375 stored. This implies the exponent will fall within the IEEE format
3376 range, and the leading hexadecimal digit will be 0 (for denormals),
3377 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3378 any other digits zero).
3379*/
3380unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3381 bool upperCase,
3382 roundingMode rounding_mode) const {
3383 char *p;
3384
3385 p = dst;
3386 if (sign)
3387 *dst++ = '-';
3388
3389 switch (category) {
3390 case fcInfinity:
3391 memcpy (dest: dst, src: upperCase ? infinityU: infinityL, n: sizeof infinityU - 1);
3392 dst += sizeof infinityL - 1;
3393 break;
3394
3395 case fcNaN:
3396 memcpy (dest: dst, src: upperCase ? NaNU: NaNL, n: sizeof NaNU - 1);
3397 dst += sizeof NaNU - 1;
3398 break;
3399
3400 case fcZero:
3401 *dst++ = '0';
3402 *dst++ = upperCase ? 'X': 'x';
3403 *dst++ = '0';
3404 if (hexDigits > 1) {
3405 *dst++ = '.';
3406 memset (s: dst, c: '0', n: hexDigits - 1);
3407 dst += hexDigits - 1;
3408 }
3409 *dst++ = upperCase ? 'P': 'p';
3410 *dst++ = '0';
3411 break;
3412
3413 case fcNormal:
3414 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3415 break;
3416 }
3417
3418 *dst = 0;
3419
3420 return static_cast<unsigned int>(dst - p);
3421}
3422
3423/* Does the hard work of outputting the correctly rounded hexadecimal
3424 form of a normal floating point number with the specified number of
3425 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3426 digits necessary to print the value precisely is output. */
3427char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3428 bool upperCase,
3429 roundingMode rounding_mode) const {
3430 unsigned int count, valueBits, shift, partsCount, outputDigits;
3431 const char *hexDigitChars;
3432 const integerPart *significand;
3433 char *p;
3434 bool roundUp;
3435
3436 *dst++ = '0';
3437 *dst++ = upperCase ? 'X': 'x';
3438
3439 roundUp = false;
3440 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3441
3442 significand = significandParts();
3443 partsCount = partCount();
3444
3445 /* +3 because the first digit only uses the single integer bit, so
3446 we have 3 virtual zero most-significant-bits. */
3447 valueBits = semantics->precision + 3;
3448 shift = integerPartWidth - valueBits % integerPartWidth;
3449
3450 /* The natural number of digits required ignoring trailing
3451 insignificant zeroes. */
3452 outputDigits = (valueBits - significandLSB () + 3) / 4;
3453
3454 /* hexDigits of zero means use the required number for the
3455 precision. Otherwise, see if we are truncating. If we are,
3456 find out if we need to round away from zero. */
3457 if (hexDigits) {
3458 if (hexDigits < outputDigits) {
3459 /* We are dropping non-zero bits, so need to check how to round.
3460 "bits" is the number of dropped bits. */
3461 unsigned int bits;
3462 lostFraction fraction;
3463
3464 bits = valueBits - hexDigits * 4;
3465 fraction = lostFractionThroughTruncation (parts: significand, partCount: partsCount, bits);
3466 roundUp = roundAwayFromZero(rounding_mode, lost_fraction: fraction, bit: bits);
3467 }
3468 outputDigits = hexDigits;
3469 }
3470
3471 /* Write the digits consecutively, and start writing in the location
3472 of the hexadecimal point. We move the most significant digit
3473 left and add the hexadecimal point later. */
3474 p = ++dst;
3475
3476 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3477
3478 while (outputDigits && count) {
3479 integerPart part;
3480
3481 /* Put the most significant integerPartWidth bits in "part". */
3482 if (--count == partsCount)
3483 part = 0; /* An imaginary higher zero part. */
3484 else
3485 part = significand[count] << shift;
3486
3487 if (count && shift)
3488 part |= significand[count - 1] >> (integerPartWidth - shift);
3489
3490 /* Convert as much of "part" to hexdigits as we can. */
3491 unsigned int curDigits = integerPartWidth / 4;
3492
3493 if (curDigits > outputDigits)
3494 curDigits = outputDigits;
3495 dst += partAsHex (dst, part, count: curDigits, hexDigitChars);
3496 outputDigits -= curDigits;
3497 }
3498
3499 if (roundUp) {
3500 char *q = dst;
3501
3502 /* Note that hexDigitChars has a trailing '0'. */
3503 do {
3504 q--;
3505 *q = hexDigitChars[hexDigitValue (C: *q) + 1];
3506 } while (*q == '0');
3507 assert(q >= p);
3508 } else {
3509 /* Add trailing zeroes. */
3510 memset (s: dst, c: '0', n: outputDigits);
3511 dst += outputDigits;
3512 }
3513
3514 /* Move the most significant digit to before the point, and if there
3515 is something after the decimal point add it. This must come
3516 after rounding above. */
3517 p[-1] = p[0];
3518 if (dst -1 == p)
3519 dst--;
3520 else
3521 p[0] = '.';
3522
3523 /* Finally output the exponent. */
3524 *dst++ = upperCase ? 'P': 'p';
3525
3526 return writeSignedDecimal (dst, value: exponent);
3527}
3528
3529hash_code hash_value(const IEEEFloat &Arg) {
3530 if (!Arg.isFiniteNonZero())
3531 return hash_combine(args: (uint8_t)Arg.category,
3532 // NaN has no sign, fix it at zero.
3533 args: Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3534 args: Arg.semantics->precision);
3535
3536 // Normal floats need their exponent and significand hashed.
3537 return hash_combine(args: (uint8_t)Arg.category, args: (uint8_t)Arg.sign,
3538 args: Arg.semantics->precision, args: Arg.exponent,
3539 args: hash_combine_range(
3540 first: Arg.significandParts(),
3541 last: Arg.significandParts() + Arg.partCount()));
3542}
3543
3544// Conversion from APFloat to/from host float/double. It may eventually be
3545// possible to eliminate these and have everybody deal with APFloats, but that
3546// will take a while. This approach will not easily extend to long double.
3547// Current implementation requires integerPartWidth==64, which is correct at
3548// the moment but could be made more general.
3549
3550// Denormals have exponent minExponent in APFloat, but minExponent-1 in
3551// the actual IEEE respresentations. We compensate for that here.
3552
3553APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3554 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3555 assert(partCount()==2);
3556
3557 uint64_t myexponent, mysignificand;
3558
3559 if (isFiniteNonZero()) {
3560 myexponent = exponent+16383; //bias
3561 mysignificand = significandParts()[0];
3562 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3563 myexponent = 0; // denormal
3564 } else if (category==fcZero) {
3565 myexponent = 0;
3566 mysignificand = 0;
3567 } else if (category==fcInfinity) {
3568 myexponent = 0x7fff;
3569 mysignificand = 0x8000000000000000ULL;
3570 } else {
3571 assert(category == fcNaN && "Unknown category");
3572 myexponent = 0x7fff;
3573 mysignificand = significandParts()[0];
3574 }
3575
3576 uint64_t words[2];
3577 words[0] = mysignificand;
3578 words[1] = ((uint64_t)(sign & 1) << 15) |
3579 (myexponent & 0x7fffLL);
3580 return APInt(80, words);
3581}
3582
3583APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
3584 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3585 assert(partCount()==2);
3586
3587 uint64_t words[2];
3588 opStatus fs;
3589 bool losesInfo;
3590
3591 // Convert number to double. To avoid spurious underflows, we re-
3592 // normalize against the "double" minExponent first, and only *then*
3593 // truncate the mantissa. The result of that second conversion
3594 // may be inexact, but should never underflow.
3595 // Declare fltSemantics before APFloat that uses it (and
3596 // saves pointer to it) to ensure correct destruction order.
3597 fltSemantics extendedSemantics = *semantics;
3598 extendedSemantics.minExponent = semIEEEdouble.minExponent;
3599 IEEEFloat extended(*this);
3600 fs = extended.convert(toSemantics: extendedSemantics, rounding_mode: rmNearestTiesToEven, losesInfo: &losesInfo);
3601 assert(fs == opOK && !losesInfo);
3602 (void)fs;
3603
3604 IEEEFloat u(extended);
3605 fs = u.convert(toSemantics: semIEEEdouble, rounding_mode: rmNearestTiesToEven, losesInfo: &losesInfo);
3606 assert(fs == opOK || fs == opInexact);
3607 (void)fs;
3608 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3609
3610 // If conversion was exact or resulted in a special case, we're done;
3611 // just set the second double to zero. Otherwise, re-convert back to
3612 // the extended format and compute the difference. This now should
3613 // convert exactly to double.
3614 if (u.isFiniteNonZero() && losesInfo) {
3615 fs = u.convert(toSemantics: extendedSemantics, rounding_mode: rmNearestTiesToEven, losesInfo: &losesInfo);
3616 assert(fs == opOK && !losesInfo);
3617 (void)fs;
3618
3619 IEEEFloat v(extended);
3620 v.subtract(rhs: u, rounding_mode: rmNearestTiesToEven);
3621 fs = v.convert(toSemantics: semIEEEdouble, rounding_mode: rmNearestTiesToEven, losesInfo: &losesInfo);
3622 assert(fs == opOK && !losesInfo);
3623 (void)fs;
3624 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3625 } else {
3626 words[1] = 0;
3627 }
3628
3629 return APInt(128, words);
3630}
3631
3632template <const fltSemantics &S>
3633APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3634 assert(semantics == &S);
3635 const int bias =
3636 (semantics == &semFloat8E8M0FNU) ? -S.minExponent : -(S.minExponent - 1);
3637 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3638 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3639 constexpr integerPart integer_bit =
3640 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3641 constexpr uint64_t significand_mask = integer_bit - 1;
3642 constexpr unsigned int exponent_bits =
3643 trailing_significand_bits ? (S.sizeInBits - 1 - trailing_significand_bits)
3644 : S.sizeInBits;
3645 static_assert(exponent_bits < 64);
3646 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3647
3648 uint64_t myexponent;
3649 std::array<integerPart, partCountForBits(bits: trailing_significand_bits)>
3650 mysignificand;
3651
3652 if (isFiniteNonZero()) {
3653 myexponent = exponent + bias;
3654 std::copy_n(significandParts(), mysignificand.size(),
3655 mysignificand.begin());
3656 if (myexponent == 1 &&
3657 !(significandParts()[integer_bit_part] & integer_bit))
3658 myexponent = 0; // denormal
3659 } else if (category == fcZero) {
3660 if (!S.hasZero)
3661 llvm_unreachable("semantics does not support zero!");
3662 myexponent = ::exponentZero(semantics: S) + bias;
3663 mysignificand.fill(0);
3664 } else if (category == fcInfinity) {
3665 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3666 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3667 llvm_unreachable("semantics don't support inf!");
3668 myexponent = ::exponentInf(semantics: S) + bias;
3669 mysignificand.fill(0);
3670 } else {
3671 assert(category == fcNaN && "Unknown category!");
3672 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3673 llvm_unreachable("semantics don't support NaN!");
3674 myexponent = ::exponentNaN(semantics: S) + bias;
3675 std::copy_n(significandParts(), mysignificand.size(),
3676 mysignificand.begin());
3677 }
3678 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3679 auto words_iter =
3680 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3681 if constexpr (significand_mask != 0 || trailing_significand_bits == 0) {
3682 // Clear the integer bit.
3683 words[mysignificand.size() - 1] &= significand_mask;
3684 }
3685 std::fill(words_iter, words.end(), uint64_t{0});
3686 constexpr size_t last_word = words.size() - 1;
3687 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3688 << ((S.sizeInBits - 1) % 64);
3689 words[last_word] |= shifted_sign;
3690 uint64_t shifted_exponent = (myexponent & exponent_mask)
3691 << (trailing_significand_bits % 64);
3692 words[last_word] |= shifted_exponent;
3693 if constexpr (last_word == 0) {
3694 return APInt(S.sizeInBits, words[0]);
3695 }
3696 return APInt(S.sizeInBits, words);
3697}
3698
3699APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3700 assert(partCount() == 2);
3701 return convertIEEEFloatToAPInt<semIEEEquad>();
3702}
3703
3704APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3705 assert(partCount()==1);
3706 return convertIEEEFloatToAPInt<semIEEEdouble>();
3707}
3708
3709APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3710 assert(partCount()==1);
3711 return convertIEEEFloatToAPInt<semIEEEsingle>();
3712}
3713
3714APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3715 assert(partCount() == 1);
3716 return convertIEEEFloatToAPInt<semBFloat>();
3717}
3718
3719APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3720 assert(partCount()==1);
3721 return convertIEEEFloatToAPInt<semIEEEhalf>();
3722}
3723
3724APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3725 assert(partCount() == 1);
3726 return convertIEEEFloatToAPInt<semFloat8E5M2>();
3727}
3728
3729APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3730 assert(partCount() == 1);
3731 return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
3732}
3733
3734APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3735 assert(partCount() == 1);
3736 return convertIEEEFloatToAPInt<semFloat8E4M3>();
3737}
3738
3739APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3740 assert(partCount() == 1);
3741 return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
3742}
3743
3744APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3745 assert(partCount() == 1);
3746 return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
3747}
3748
3749APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3750 assert(partCount() == 1);
3751 return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
3752}
3753
3754APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
3755 assert(partCount() == 1);
3756 return convertIEEEFloatToAPInt<semFloat8E3M4>();
3757}
3758
3759APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3760 assert(partCount() == 1);
3761 return convertIEEEFloatToAPInt<semFloatTF32>();
3762}
3763
3764APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
3765 assert(partCount() == 1);
3766 return convertIEEEFloatToAPInt<semFloat8E8M0FNU>();
3767}
3768
3769APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3770 assert(partCount() == 1);
3771 return convertIEEEFloatToAPInt<semFloat6E3M2FN>();
3772}
3773
3774APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3775 assert(partCount() == 1);
3776 return convertIEEEFloatToAPInt<semFloat6E2M3FN>();
3777}
3778
3779APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3780 assert(partCount() == 1);
3781 return convertIEEEFloatToAPInt<semFloat4E2M1FN>();
3782}
3783
3784// This function creates an APInt that is just a bit map of the floating
3785// point constant as it would appear in memory. It is not a conversion,
3786// and treating the result as a normal integer is unlikely to be useful.
3787
3788APInt IEEEFloat::bitcastToAPInt() const {
3789 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3790 return convertHalfAPFloatToAPInt();
3791
3792 if (semantics == (const llvm::fltSemantics *)&semBFloat)
3793 return convertBFloatAPFloatToAPInt();
3794
3795 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3796 return convertFloatAPFloatToAPInt();
3797
3798 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3799 return convertDoubleAPFloatToAPInt();
3800
3801 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3802 return convertQuadrupleAPFloatToAPInt();
3803
3804 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3805 return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
3806
3807 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3808 return convertFloat8E5M2APFloatToAPInt();
3809
3810 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
3811 return convertFloat8E5M2FNUZAPFloatToAPInt();
3812
3813 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3)
3814 return convertFloat8E4M3APFloatToAPInt();
3815
3816 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3817 return convertFloat8E4M3FNAPFloatToAPInt();
3818
3819 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
3820 return convertFloat8E4M3FNUZAPFloatToAPInt();
3821
3822 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
3823 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3824
3825 if (semantics == (const llvm::fltSemantics *)&semFloat8E3M4)
3826 return convertFloat8E3M4APFloatToAPInt();
3827
3828 if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
3829 return convertFloatTF32APFloatToAPInt();
3830
3831 if (semantics == (const llvm::fltSemantics *)&semFloat8E8M0FNU)
3832 return convertFloat8E8M0FNUAPFloatToAPInt();
3833
3834 if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN)
3835 return convertFloat6E3M2FNAPFloatToAPInt();
3836
3837 if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN)
3838 return convertFloat6E2M3FNAPFloatToAPInt();
3839
3840 if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN)
3841 return convertFloat4E2M1FNAPFloatToAPInt();
3842
3843 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3844 "unknown format!");
3845 return convertF80LongDoubleAPFloatToAPInt();
3846}
3847
3848float IEEEFloat::convertToFloat() const {
3849 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3850 "Float semantics are not IEEEsingle");
3851 APInt api = bitcastToAPInt();
3852 return api.bitsToFloat();
3853}
3854
3855double IEEEFloat::convertToDouble() const {
3856 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3857 "Float semantics are not IEEEdouble");
3858 APInt api = bitcastToAPInt();
3859 return api.bitsToDouble();
3860}
3861
3862#ifdef HAS_IEE754_FLOAT128
3863float128 IEEEFloat::convertToQuad() const {
3864 assert(semantics == (const llvm::fltSemantics *)&semIEEEquad &&
3865 "Float semantics are not IEEEquads");
3866 APInt api = bitcastToAPInt();
3867 return api.bitsToQuad();
3868}
3869#endif
3870
3871/// Integer bit is explicit in this format. Intel hardware (387 and later)
3872/// does not support these bit patterns:
3873/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3874/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3875/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3876/// exponent = 0, integer bit 1 ("pseudodenormal")
3877/// At the moment, the first three are treated as NaNs, the last one as Normal.
3878void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3879 uint64_t i1 = api.getRawData()[0];
3880 uint64_t i2 = api.getRawData()[1];
3881 uint64_t myexponent = (i2 & 0x7fff);
3882 uint64_t mysignificand = i1;
3883 uint8_t myintegerbit = mysignificand >> 63;
3884
3885 initialize(ourSemantics: &semX87DoubleExtended);
3886 assert(partCount()==2);
3887
3888 sign = static_cast<unsigned int>(i2>>15);
3889 if (myexponent == 0 && mysignificand == 0) {
3890 makeZero(Neg: sign);
3891 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3892 makeInf(Neg: sign);
3893 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3894 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3895 category = fcNaN;
3896 exponent = exponentNaN();
3897 significandParts()[0] = mysignificand;
3898 significandParts()[1] = 0;
3899 } else {
3900 category = fcNormal;
3901 exponent = myexponent - 16383;
3902 significandParts()[0] = mysignificand;
3903 significandParts()[1] = 0;
3904 if (myexponent==0) // denormal
3905 exponent = -16382;
3906 }
3907}
3908
3909void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) {
3910 uint64_t i1 = api.getRawData()[0];
3911 uint64_t i2 = api.getRawData()[1];
3912 opStatus fs;
3913 bool losesInfo;
3914
3915 // Get the first double and convert to our format.
3916 initFromDoubleAPInt(api: APInt(64, i1));
3917 fs = convert(toSemantics: semPPCDoubleDoubleLegacy, rounding_mode: rmNearestTiesToEven, losesInfo: &losesInfo);
3918 assert(fs == opOK && !losesInfo);
3919 (void)fs;
3920
3921 // Unless we have a special case, add in second double.
3922 if (isFiniteNonZero()) {
3923 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3924 fs = v.convert(toSemantics: semPPCDoubleDoubleLegacy, rounding_mode: rmNearestTiesToEven, losesInfo: &losesInfo);
3925 assert(fs == opOK && !losesInfo);
3926 (void)fs;
3927
3928 add(rhs: v, rounding_mode: rmNearestTiesToEven);
3929 }
3930}
3931
3932// The E8M0 format has the following characteristics:
3933// It is an 8-bit unsigned format with only exponents (no actual significand).
3934// No encodings for {zero, infinities or denorms}.
3935// NaN is represented by all 1's.
3936// Bias is 127.
3937void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
3938 const uint64_t exponent_mask = 0xff;
3939 uint64_t val = api.getRawData()[0];
3940 uint64_t myexponent = (val & exponent_mask);
3941
3942 initialize(ourSemantics: &semFloat8E8M0FNU);
3943 assert(partCount() == 1);
3944
3945 // This format has unsigned representation only
3946 sign = 0;
3947
3948 // Set the significand
3949 // This format does not have any significand but the 'Pth' precision bit is
3950 // always set to 1 for consistency in APFloat's internal representation.
3951 uint64_t mysignificand = 1;
3952 significandParts()[0] = mysignificand;
3953
3954 // This format can either have a NaN or fcNormal
3955 // All 1's i.e. 255 is a NaN
3956 if (val == exponent_mask) {
3957 category = fcNaN;
3958 exponent = exponentNaN();
3959 return;
3960 }
3961 // Handle fcNormal...
3962 category = fcNormal;
3963 exponent = myexponent - 127; // 127 is bias
3964}
3965template <const fltSemantics &S>
3966void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3967 assert(api.getBitWidth() == S.sizeInBits);
3968 constexpr integerPart integer_bit = integerPart{1}
3969 << ((S.precision - 1) % integerPartWidth);
3970 constexpr uint64_t significand_mask = integer_bit - 1;
3971 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3972 constexpr unsigned int stored_significand_parts =
3973 partCountForBits(bits: trailing_significand_bits);
3974 constexpr unsigned int exponent_bits =
3975 S.sizeInBits - 1 - trailing_significand_bits;
3976 static_assert(exponent_bits < 64);
3977 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3978 constexpr int bias = -(S.minExponent - 1);
3979
3980 // Copy the bits of the significand. We need to clear out the exponent and
3981 // sign bit in the last word.
3982 std::array<integerPart, stored_significand_parts> mysignificand;
3983 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3984 if constexpr (significand_mask != 0) {
3985 mysignificand[mysignificand.size() - 1] &= significand_mask;
3986 }
3987
3988 // We assume the last word holds the sign bit, the exponent, and potentially
3989 // some of the trailing significand field.
3990 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3991 uint64_t myexponent =
3992 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3993
3994 initialize(ourSemantics: &S);
3995 assert(partCount() == mysignificand.size());
3996
3997 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3998
3999 bool all_zero_significand =
4000 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
4001
4002 bool is_zero = myexponent == 0 && all_zero_significand;
4003
4004 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
4005 if (myexponent - bias == ::exponentInf(semantics: S) && all_zero_significand) {
4006 makeInf(Neg: sign);
4007 return;
4008 }
4009 }
4010
4011 bool is_nan = false;
4012
4013 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
4014 is_nan = myexponent - bias == ::exponentNaN(semantics: S) && !all_zero_significand;
4015 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
4016 bool all_ones_significand =
4017 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
4018 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
4019 (!significand_mask ||
4020 mysignificand[mysignificand.size() - 1] == significand_mask);
4021 is_nan = myexponent - bias == ::exponentNaN(semantics: S) && all_ones_significand;
4022 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
4023 is_nan = is_zero && sign;
4024 }
4025
4026 if (is_nan) {
4027 category = fcNaN;
4028 exponent = ::exponentNaN(semantics: S);
4029 std::copy_n(mysignificand.begin(), mysignificand.size(),
4030 significandParts());
4031 return;
4032 }
4033
4034 if (is_zero) {
4035 makeZero(Neg: sign);
4036 return;
4037 }
4038
4039 category = fcNormal;
4040 exponent = myexponent - bias;
4041 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
4042 if (myexponent == 0) // denormal
4043 exponent = S.minExponent;
4044 else
4045 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
4046}
4047
4048void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
4049 initFromIEEEAPInt<semIEEEquad>(api);
4050}
4051
4052void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
4053 initFromIEEEAPInt<semIEEEdouble>(api);
4054}
4055
4056void IEEEFloat::initFromFloatAPInt(const APInt &api) {
4057 initFromIEEEAPInt<semIEEEsingle>(api);
4058}
4059
4060void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
4061 initFromIEEEAPInt<semBFloat>(api);
4062}
4063
4064void IEEEFloat::initFromHalfAPInt(const APInt &api) {
4065 initFromIEEEAPInt<semIEEEhalf>(api);
4066}
4067
4068void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
4069 initFromIEEEAPInt<semFloat8E5M2>(api);
4070}
4071
4072void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
4073 initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
4074}
4075
4076void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
4077 initFromIEEEAPInt<semFloat8E4M3>(api);
4078}
4079
4080void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
4081 initFromIEEEAPInt<semFloat8E4M3FN>(api);
4082}
4083
4084void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
4085 initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
4086}
4087
4088void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
4089 initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
4090}
4091
4092void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
4093 initFromIEEEAPInt<semFloat8E3M4>(api);
4094}
4095
4096void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
4097 initFromIEEEAPInt<semFloatTF32>(api);
4098}
4099
4100void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
4101 initFromIEEEAPInt<semFloat6E3M2FN>(api);
4102}
4103
4104void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
4105 initFromIEEEAPInt<semFloat6E2M3FN>(api);
4106}
4107
4108void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
4109 initFromIEEEAPInt<semFloat4E2M1FN>(api);
4110}
4111
4112/// Treat api as containing the bits of a floating point number.
4113void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
4114 assert(api.getBitWidth() == Sem->sizeInBits);
4115 if (Sem == &semIEEEhalf)
4116 return initFromHalfAPInt(api);
4117 if (Sem == &semBFloat)
4118 return initFromBFloatAPInt(api);
4119 if (Sem == &semIEEEsingle)
4120 return initFromFloatAPInt(api);
4121 if (Sem == &semIEEEdouble)
4122 return initFromDoubleAPInt(api);
4123 if (Sem == &semX87DoubleExtended)
4124 return initFromF80LongDoubleAPInt(api);
4125 if (Sem == &semIEEEquad)
4126 return initFromQuadrupleAPInt(api);
4127 if (Sem == &semPPCDoubleDoubleLegacy)
4128 return initFromPPCDoubleDoubleLegacyAPInt(api);
4129 if (Sem == &semFloat8E5M2)
4130 return initFromFloat8E5M2APInt(api);
4131 if (Sem == &semFloat8E5M2FNUZ)
4132 return initFromFloat8E5M2FNUZAPInt(api);
4133 if (Sem == &semFloat8E4M3)
4134 return initFromFloat8E4M3APInt(api);
4135 if (Sem == &semFloat8E4M3FN)
4136 return initFromFloat8E4M3FNAPInt(api);
4137 if (Sem == &semFloat8E4M3FNUZ)
4138 return initFromFloat8E4M3FNUZAPInt(api);
4139 if (Sem == &semFloat8E4M3B11FNUZ)
4140 return initFromFloat8E4M3B11FNUZAPInt(api);
4141 if (Sem == &semFloat8E3M4)
4142 return initFromFloat8E3M4APInt(api);
4143 if (Sem == &semFloatTF32)
4144 return initFromFloatTF32APInt(api);
4145 if (Sem == &semFloat8E8M0FNU)
4146 return initFromFloat8E8M0FNUAPInt(api);
4147 if (Sem == &semFloat6E3M2FN)
4148 return initFromFloat6E3M2FNAPInt(api);
4149 if (Sem == &semFloat6E2M3FN)
4150 return initFromFloat6E2M3FNAPInt(api);
4151 if (Sem == &semFloat4E2M1FN)
4152 return initFromFloat4E2M1FNAPInt(api);
4153
4154 llvm_unreachable("unsupported semantics");
4155}
4156
4157/// Make this number the largest magnitude normal number in the given
4158/// semantics.
4159void IEEEFloat::makeLargest(bool Negative) {
4160 if (Negative && !semantics->hasSignedRepr)
4161 llvm_unreachable(
4162 "This floating point format does not support signed values");
4163 // We want (in interchange format):
4164 // sign = {Negative}
4165 // exponent = 1..10
4166 // significand = 1..1
4167 category = fcNormal;
4168 sign = Negative;
4169 exponent = semantics->maxExponent;
4170
4171 // Use memset to set all but the highest integerPart to all ones.
4172 integerPart *significand = significandParts();
4173 unsigned PartCount = partCount();
4174 memset(s: significand, c: 0xFF, n: sizeof(integerPart)*(PartCount - 1));
4175
4176 // Set the high integerPart especially setting all unused top bits for
4177 // internal consistency.
4178 const unsigned NumUnusedHighBits =
4179 PartCount*integerPartWidth - semantics->precision;
4180 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
4181 ? (~integerPart(0) >> NumUnusedHighBits)
4182 : 0;
4183 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
4184 semantics->nanEncoding == fltNanEncoding::AllOnes &&
4185 (semantics->precision > 1))
4186 significand[0] &= ~integerPart(1);
4187}
4188
4189/// Make this number the smallest magnitude denormal number in the given
4190/// semantics.
4191void IEEEFloat::makeSmallest(bool Negative) {
4192 if (Negative && !semantics->hasSignedRepr)
4193 llvm_unreachable(
4194 "This floating point format does not support signed values");
4195 // We want (in interchange format):
4196 // sign = {Negative}
4197 // exponent = 0..0
4198 // significand = 0..01
4199 category = fcNormal;
4200 sign = Negative;
4201 exponent = semantics->minExponent;
4202 APInt::tcSet(significandParts(), 1, partCount());
4203}
4204
4205void IEEEFloat::makeSmallestNormalized(bool Negative) {
4206 if (Negative && !semantics->hasSignedRepr)
4207 llvm_unreachable(
4208 "This floating point format does not support signed values");
4209 // We want (in interchange format):
4210 // sign = {Negative}
4211 // exponent = 0..0
4212 // significand = 10..0
4213
4214 category = fcNormal;
4215 zeroSignificand();
4216 sign = Negative;
4217 exponent = semantics->minExponent;
4218 APInt::tcSetBit(significandParts(), bit: semantics->precision - 1);
4219}
4220
4221IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4222 initFromAPInt(Sem: &Sem, api: API);
4223}
4224
4225IEEEFloat::IEEEFloat(float f) {
4226 initFromAPInt(Sem: &semIEEEsingle, api: APInt::floatToBits(V: f));
4227}
4228
4229IEEEFloat::IEEEFloat(double d) {
4230 initFromAPInt(Sem: &semIEEEdouble, api: APInt::doubleToBits(V: d));
4231}
4232
4233namespace {
4234 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4235 Buffer.append(in_start: Str.begin(), in_end: Str.end());
4236 }
4237
4238 /// Removes data from the given significand until it is no more
4239 /// precise than is required for the desired precision.
4240 void AdjustToPrecision(APInt &significand,
4241 int &exp, unsigned FormatPrecision) {
4242 unsigned bits = significand.getActiveBits();
4243
4244 // 196/59 is a very slight overestimate of lg_2(10).
4245 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4246
4247 if (bits <= bitsRequired) return;
4248
4249 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4250 if (!tensRemovable) return;
4251
4252 exp += tensRemovable;
4253
4254 APInt divisor(significand.getBitWidth(), 1);
4255 APInt powten(significand.getBitWidth(), 10);
4256 while (true) {
4257 if (tensRemovable & 1)
4258 divisor *= powten;
4259 tensRemovable >>= 1;
4260 if (!tensRemovable) break;
4261 powten *= powten;
4262 }
4263
4264 significand = significand.udiv(RHS: divisor);
4265
4266 // Truncate the significand down to its active bit count.
4267 significand = significand.trunc(width: significand.getActiveBits());
4268 }
4269
4270
4271 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4272 int &exp, unsigned FormatPrecision) {
4273 unsigned N = buffer.size();
4274 if (N <= FormatPrecision) return;
4275
4276 // The most significant figures are the last ones in the buffer.
4277 unsigned FirstSignificant = N - FormatPrecision;
4278
4279 // Round.
4280 // FIXME: this probably shouldn't use 'round half up'.
4281
4282 // Rounding down is just a truncation, except we also want to drop
4283 // trailing zeros from the new result.
4284 if (buffer[FirstSignificant - 1] < '5') {
4285 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4286 FirstSignificant++;
4287
4288 exp += FirstSignificant;
4289 buffer.erase(CS: &buffer[0], CE: &buffer[FirstSignificant]);
4290 return;
4291 }
4292
4293 // Rounding up requires a decimal add-with-carry. If we continue
4294 // the carry, the newly-introduced zeros will just be truncated.
4295 for (unsigned I = FirstSignificant; I != N; ++I) {
4296 if (buffer[I] == '9') {
4297 FirstSignificant++;
4298 } else {
4299 buffer[I]++;
4300 break;
4301 }
4302 }
4303
4304 // If we carried through, we have exactly one digit of precision.
4305 if (FirstSignificant == N) {
4306 exp += FirstSignificant;
4307 buffer.clear();
4308 buffer.push_back(Elt: '1');
4309 return;
4310 }
4311
4312 exp += FirstSignificant;
4313 buffer.erase(CS: &buffer[0], CE: &buffer[FirstSignificant]);
4314 }
4315
4316 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4317 APInt significand, unsigned FormatPrecision,
4318 unsigned FormatMaxPadding, bool TruncateZero) {
4319 const int semanticsPrecision = significand.getBitWidth();
4320
4321 if (isNeg)
4322 Str.push_back(Elt: '-');
4323
4324 // Set FormatPrecision if zero. We want to do this before we
4325 // truncate trailing zeros, as those are part of the precision.
4326 if (!FormatPrecision) {
4327 // We use enough digits so the number can be round-tripped back to an
4328 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4329 // Accurately" by Steele and White.
4330 // FIXME: Using a formula based purely on the precision is conservative;
4331 // we can print fewer digits depending on the actual value being printed.
4332
4333 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4334 FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4335 }
4336
4337 // Ignore trailing binary zeros.
4338 int trailingZeros = significand.countr_zero();
4339 exp += trailingZeros;
4340 significand.lshrInPlace(ShiftAmt: trailingZeros);
4341
4342 // Change the exponent from 2^e to 10^e.
4343 if (exp == 0) {
4344 // Nothing to do.
4345 } else if (exp > 0) {
4346 // Just shift left.
4347 significand = significand.zext(width: semanticsPrecision + exp);
4348 significand <<= exp;
4349 exp = 0;
4350 } else { /* exp < 0 */
4351 int texp = -exp;
4352
4353 // We transform this using the identity:
4354 // (N)(2^-e) == (N)(5^e)(10^-e)
4355 // This means we have to multiply N (the significand) by 5^e.
4356 // To avoid overflow, we have to operate on numbers large
4357 // enough to store N * 5^e:
4358 // log2(N * 5^e) == log2(N) + e * log2(5)
4359 // <= semantics->precision + e * 137 / 59
4360 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4361
4362 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4363
4364 // Multiply significand by 5^e.
4365 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4366 significand = significand.zext(width: precision);
4367 APInt five_to_the_i(precision, 5);
4368 while (true) {
4369 if (texp & 1)
4370 significand *= five_to_the_i;
4371
4372 texp >>= 1;
4373 if (!texp)
4374 break;
4375 five_to_the_i *= five_to_the_i;
4376 }
4377 }
4378
4379 AdjustToPrecision(significand, exp, FormatPrecision);
4380
4381 SmallVector<char, 256> buffer;
4382
4383 // Fill the buffer.
4384 unsigned precision = significand.getBitWidth();
4385 if (precision < 4) {
4386 // We need enough precision to store the value 10.
4387 precision = 4;
4388 significand = significand.zext(width: precision);
4389 }
4390 APInt ten(precision, 10);
4391 APInt digit(precision, 0);
4392
4393 bool inTrail = true;
4394 while (significand != 0) {
4395 // digit <- significand % 10
4396 // significand <- significand / 10
4397 APInt::udivrem(LHS: significand, RHS: ten, Quotient&: significand, Remainder&: digit);
4398
4399 unsigned d = digit.getZExtValue();
4400
4401 // Drop trailing zeros.
4402 if (inTrail && !d)
4403 exp++;
4404 else {
4405 buffer.push_back(Elt: (char) ('0' + d));
4406 inTrail = false;
4407 }
4408 }
4409
4410 assert(!buffer.empty() && "no characters in buffer!");
4411
4412 // Drop down to FormatPrecision.
4413 // TODO: don't do more precise calculations above than are required.
4414 AdjustToPrecision(buffer, exp, FormatPrecision);
4415
4416 unsigned NDigits = buffer.size();
4417
4418 // Check whether we should use scientific notation.
4419 bool FormatScientific;
4420 if (!FormatMaxPadding)
4421 FormatScientific = true;
4422 else {
4423 if (exp >= 0) {
4424 // 765e3 --> 765000
4425 // ^^^
4426 // But we shouldn't make the number look more precise than it is.
4427 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4428 NDigits + (unsigned) exp > FormatPrecision);
4429 } else {
4430 // Power of the most significant digit.
4431 int MSD = exp + (int) (NDigits - 1);
4432 if (MSD >= 0) {
4433 // 765e-2 == 7.65
4434 FormatScientific = false;
4435 } else {
4436 // 765e-5 == 0.00765
4437 // ^ ^^
4438 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4439 }
4440 }
4441 }
4442
4443 // Scientific formatting is pretty straightforward.
4444 if (FormatScientific) {
4445 exp += (NDigits - 1);
4446
4447 Str.push_back(Elt: buffer[NDigits-1]);
4448 Str.push_back(Elt: '.');
4449 if (NDigits == 1 && TruncateZero)
4450 Str.push_back(Elt: '0');
4451 else
4452 for (unsigned I = 1; I != NDigits; ++I)
4453 Str.push_back(Elt: buffer[NDigits-1-I]);
4454 // Fill with zeros up to FormatPrecision.
4455 if (!TruncateZero && FormatPrecision > NDigits - 1)
4456 Str.append(NumInputs: FormatPrecision - NDigits + 1, Elt: '0');
4457 // For !TruncateZero we use lower 'e'.
4458 Str.push_back(Elt: TruncateZero ? 'E' : 'e');
4459
4460 Str.push_back(Elt: exp >= 0 ? '+' : '-');
4461 if (exp < 0)
4462 exp = -exp;
4463 SmallVector<char, 6> expbuf;
4464 do {
4465 expbuf.push_back(Elt: (char) ('0' + (exp % 10)));
4466 exp /= 10;
4467 } while (exp);
4468 // Exponent always at least two digits if we do not truncate zeros.
4469 if (!TruncateZero && expbuf.size() < 2)
4470 expbuf.push_back(Elt: '0');
4471 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4472 Str.push_back(Elt: expbuf[E-1-I]);
4473 return;
4474 }
4475
4476 // Non-scientific, positive exponents.
4477 if (exp >= 0) {
4478 for (unsigned I = 0; I != NDigits; ++I)
4479 Str.push_back(Elt: buffer[NDigits-1-I]);
4480 for (unsigned I = 0; I != (unsigned) exp; ++I)
4481 Str.push_back(Elt: '0');
4482 return;
4483 }
4484
4485 // Non-scientific, negative exponents.
4486
4487 // The number of digits to the left of the decimal point.
4488 int NWholeDigits = exp + (int) NDigits;
4489
4490 unsigned I = 0;
4491 if (NWholeDigits > 0) {
4492 for (; I != (unsigned) NWholeDigits; ++I)
4493 Str.push_back(Elt: buffer[NDigits-I-1]);
4494 Str.push_back(Elt: '.');
4495 } else {
4496 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4497
4498 Str.push_back(Elt: '0');
4499 Str.push_back(Elt: '.');
4500 for (unsigned Z = 1; Z != NZeros; ++Z)
4501 Str.push_back(Elt: '0');
4502 }
4503
4504 for (; I != NDigits; ++I)
4505 Str.push_back(Elt: buffer[NDigits-I-1]);
4506
4507 }
4508} // namespace
4509
4510void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4511 unsigned FormatMaxPadding, bool TruncateZero) const {
4512 switch (category) {
4513 case fcInfinity:
4514 if (isNegative())
4515 return append(Buffer&: Str, Str: "-Inf");
4516 else
4517 return append(Buffer&: Str, Str: "+Inf");
4518
4519 case fcNaN: return append(Buffer&: Str, Str: "NaN");
4520
4521 case fcZero:
4522 if (isNegative())
4523 Str.push_back(Elt: '-');
4524
4525 if (!FormatMaxPadding) {
4526 if (TruncateZero)
4527 append(Buffer&: Str, Str: "0.0E+0");
4528 else {
4529 append(Buffer&: Str, Str: "0.0");
4530 if (FormatPrecision > 1)
4531 Str.append(NumInputs: FormatPrecision - 1, Elt: '0');
4532 append(Buffer&: Str, Str: "e+00");
4533 }
4534 } else {
4535 Str.push_back(Elt: '0');
4536 }
4537 return;
4538
4539 case fcNormal:
4540 break;
4541 }
4542
4543 // Decompose the number into an APInt and an exponent.
4544 int exp = exponent - ((int) semantics->precision - 1);
4545 APInt significand(
4546 semantics->precision,
4547 ArrayRef(significandParts(), partCountForBits(bits: semantics->precision)));
4548
4549 toStringImpl(Str, isNeg: isNegative(), exp, significand, FormatPrecision,
4550 FormatMaxPadding, TruncateZero);
4551
4552}
4553
4554bool IEEEFloat::getExactInverse(APFloat *inv) const {
4555 // Special floats and denormals have no exact inverse.
4556 if (!isFiniteNonZero())
4557 return false;
4558
4559 // Check that the number is a power of two by making sure that only the
4560 // integer bit is set in the significand.
4561 if (significandLSB() != semantics->precision - 1)
4562 return false;
4563
4564 // Get the inverse.
4565 IEEEFloat reciprocal(*semantics, 1ULL);
4566 if (reciprocal.divide(rhs: *this, rounding_mode: rmNearestTiesToEven) != opOK)
4567 return false;
4568
4569 // Avoid multiplication with a denormal, it is not safe on all platforms and
4570 // may be slower than a normal division.
4571 if (reciprocal.isDenormal())
4572 return false;
4573
4574 assert(reciprocal.isFiniteNonZero() &&
4575 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4576
4577 if (inv)
4578 *inv = APFloat(reciprocal, *semantics);
4579
4580 return true;
4581}
4582
4583int IEEEFloat::getExactLog2Abs() const {
4584 if (!isFinite() || isZero())
4585 return INT_MIN;
4586
4587 const integerPart *Parts = significandParts();
4588 const int PartCount = partCountForBits(bits: semantics->precision);
4589
4590 int PopCount = 0;
4591 for (int i = 0; i < PartCount; ++i) {
4592 PopCount += llvm::popcount(Value: Parts[i]);
4593 if (PopCount > 1)
4594 return INT_MIN;
4595 }
4596
4597 if (exponent != semantics->minExponent)
4598 return exponent;
4599
4600 int CountrParts = 0;
4601 for (int i = 0; i < PartCount;
4602 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4603 if (Parts[i] != 0) {
4604 return exponent - semantics->precision + CountrParts +
4605 llvm::countr_zero(Val: Parts[i]) + 1;
4606 }
4607 }
4608
4609 llvm_unreachable("didn't find the set bit");
4610}
4611
4612bool IEEEFloat::isSignaling() const {
4613 if (!isNaN())
4614 return false;
4615 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4616 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4617 return false;
4618
4619 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4620 // first bit of the trailing significand being 0.
4621 return !APInt::tcExtractBit(significandParts(), bit: semantics->precision - 2);
4622}
4623
4624/// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4625///
4626/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4627/// appropriate sign switching before/after the computation.
4628APFloat::opStatus IEEEFloat::next(bool nextDown) {
4629 // If we are performing nextDown, swap sign so we have -x.
4630 if (nextDown)
4631 changeSign();
4632
4633 // Compute nextUp(x)
4634 opStatus result = opOK;
4635
4636 // Handle each float category separately.
4637 switch (category) {
4638 case fcInfinity:
4639 // nextUp(+inf) = +inf
4640 if (!isNegative())
4641 break;
4642 // nextUp(-inf) = -getLargest()
4643 makeLargest(Negative: true);
4644 break;
4645 case fcNaN:
4646 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4647 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4648 // change the payload.
4649 if (isSignaling()) {
4650 result = opInvalidOp;
4651 // For consistency, propagate the sign of the sNaN to the qNaN.
4652 makeNaN(SNaN: false, Negative: isNegative(), fill: nullptr);
4653 }
4654 break;
4655 case fcZero:
4656 // nextUp(pm 0) = +getSmallest()
4657 makeSmallest(Negative: false);
4658 break;
4659 case fcNormal:
4660 // nextUp(-getSmallest()) = -0
4661 if (isSmallest() && isNegative()) {
4662 APInt::tcSet(significandParts(), 0, partCount());
4663 category = fcZero;
4664 exponent = 0;
4665 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4666 sign = false;
4667 if (!semantics->hasZero)
4668 makeSmallestNormalized(Negative: false);
4669 break;
4670 }
4671
4672 if (isLargest() && !isNegative()) {
4673 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4674 // nextUp(getLargest()) == NAN
4675 makeNaN();
4676 break;
4677 } else if (semantics->nonFiniteBehavior ==
4678 fltNonfiniteBehavior::FiniteOnly) {
4679 // nextUp(getLargest()) == getLargest()
4680 break;
4681 } else {
4682 // nextUp(getLargest()) == INFINITY
4683 APInt::tcSet(significandParts(), 0, partCount());
4684 category = fcInfinity;
4685 exponent = semantics->maxExponent + 1;
4686 break;
4687 }
4688 }
4689
4690 // nextUp(normal) == normal + inc.
4691 if (isNegative()) {
4692 // If we are negative, we need to decrement the significand.
4693
4694 // We only cross a binade boundary that requires adjusting the exponent
4695 // if:
4696 // 1. exponent != semantics->minExponent. This implies we are not in the
4697 // smallest binade or are dealing with denormals.
4698 // 2. Our significand excluding the integral bit is all zeros.
4699 bool WillCrossBinadeBoundary =
4700 exponent != semantics->minExponent && isSignificandAllZeros();
4701
4702 // Decrement the significand.
4703 //
4704 // We always do this since:
4705 // 1. If we are dealing with a non-binade decrement, by definition we
4706 // just decrement the significand.
4707 // 2. If we are dealing with a normal -> normal binade decrement, since
4708 // we have an explicit integral bit the fact that all bits but the
4709 // integral bit are zero implies that subtracting one will yield a
4710 // significand with 0 integral bit and 1 in all other spots. Thus we
4711 // must just adjust the exponent and set the integral bit to 1.
4712 // 3. If we are dealing with a normal -> denormal binade decrement,
4713 // since we set the integral bit to 0 when we represent denormals, we
4714 // just decrement the significand.
4715 integerPart *Parts = significandParts();
4716 APInt::tcDecrement(dst: Parts, parts: partCount());
4717
4718 if (WillCrossBinadeBoundary) {
4719 // Our result is a normal number. Do the following:
4720 // 1. Set the integral bit to 1.
4721 // 2. Decrement the exponent.
4722 APInt::tcSetBit(Parts, bit: semantics->precision - 1);
4723 exponent--;
4724 }
4725 } else {
4726 // If we are positive, we need to increment the significand.
4727
4728 // We only cross a binade boundary that requires adjusting the exponent if
4729 // the input is not a denormal and all of said input's significand bits
4730 // are set. If all of said conditions are true: clear the significand, set
4731 // the integral bit to 1, and increment the exponent. If we have a
4732 // denormal always increment since moving denormals and the numbers in the
4733 // smallest normal binade have the same exponent in our representation.
4734 // If there are only exponents, any increment always crosses the
4735 // BinadeBoundary.
4736 bool WillCrossBinadeBoundary = !APFloat::hasSignificand(Sem: *semantics) ||
4737 (!isDenormal() && isSignificandAllOnes());
4738
4739 if (WillCrossBinadeBoundary) {
4740 integerPart *Parts = significandParts();
4741 APInt::tcSet(Parts, 0, partCount());
4742 APInt::tcSetBit(Parts, bit: semantics->precision - 1);
4743 assert(exponent != semantics->maxExponent &&
4744 "We can not increment an exponent beyond the maxExponent allowed"
4745 " by the given floating point semantics.");
4746 exponent++;
4747 } else {
4748 incrementSignificand();
4749 }
4750 }
4751 break;
4752 }
4753
4754 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4755 if (nextDown)
4756 changeSign();
4757
4758 return result;
4759}
4760
4761APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4762 return ::exponentNaN(semantics: *semantics);
4763}
4764
4765APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4766 return ::exponentInf(semantics: *semantics);
4767}
4768
4769APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4770 return ::exponentZero(semantics: *semantics);
4771}
4772
4773void IEEEFloat::makeInf(bool Negative) {
4774 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4775 llvm_unreachable("This floating point format does not support Inf");
4776
4777 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4778 // There is no Inf, so make NaN instead.
4779 makeNaN(SNaN: false, Negative);
4780 return;
4781 }
4782 category = fcInfinity;
4783 sign = Negative;
4784 exponent = exponentInf();
4785 APInt::tcSet(significandParts(), 0, partCount());
4786}
4787
4788void IEEEFloat::makeZero(bool Negative) {
4789 if (!semantics->hasZero)
4790 llvm_unreachable("This floating point format does not support Zero");
4791
4792 category = fcZero;
4793 sign = Negative;
4794 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4795 // Merge negative zero to positive because 0b10000...000 is used for NaN
4796 sign = false;
4797 }
4798 exponent = exponentZero();
4799 APInt::tcSet(significandParts(), 0, partCount());
4800}
4801
4802void IEEEFloat::makeQuiet() {
4803 assert(isNaN());
4804 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4805 APInt::tcSetBit(significandParts(), bit: semantics->precision - 2);
4806}
4807
4808int ilogb(const IEEEFloat &Arg) {
4809 if (Arg.isNaN())
4810 return APFloat::IEK_NaN;
4811 if (Arg.isZero())
4812 return APFloat::IEK_Zero;
4813 if (Arg.isInfinity())
4814 return APFloat::IEK_Inf;
4815 if (!Arg.isDenormal())
4816 return Arg.exponent;
4817
4818 IEEEFloat Normalized(Arg);
4819 int SignificandBits = Arg.getSemantics().precision - 1;
4820
4821 Normalized.exponent += SignificandBits;
4822 Normalized.normalize(rounding_mode: APFloat::rmNearestTiesToEven, lost_fraction: lfExactlyZero);
4823 return Normalized.exponent - SignificandBits;
4824}
4825
4826IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode RoundingMode) {
4827 auto MaxExp = X.getSemantics().maxExponent;
4828 auto MinExp = X.getSemantics().minExponent;
4829
4830 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4831 // overflow; clamp it to a safe range before adding, but ensure that the range
4832 // is large enough that the clamp does not change the result. The range we
4833 // need to support is the difference between the largest possible exponent and
4834 // the normalized exponent of half the smallest denormal.
4835
4836 int SignificandBits = X.getSemantics().precision - 1;
4837 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4838
4839 // Clamp to one past the range ends to let normalize handle overlflow.
4840 X.exponent += std::clamp(val: Exp, lo: -MaxIncrement - 1, hi: MaxIncrement);
4841 X.normalize(rounding_mode: RoundingMode, lost_fraction: lfExactlyZero);
4842 if (X.isNaN())
4843 X.makeQuiet();
4844 return X;
4845}
4846
4847IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) {
4848 Exp = ilogb(Arg: Val);
4849
4850 // Quiet signalling nans.
4851 if (Exp == APFloat::IEK_NaN) {
4852 IEEEFloat Quiet(Val);
4853 Quiet.makeQuiet();
4854 return Quiet;
4855 }
4856
4857 if (Exp == APFloat::IEK_Inf)
4858 return Val;
4859
4860 // 1 is added because frexp is defined to return a normalized fraction in
4861 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4862 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1;
4863 return scalbn(X: Val, Exp: -Exp, RoundingMode: RM);
4864}
4865
4866DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4867 : Semantics(&S),
4868 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4869 assert(Semantics == &semPPCDoubleDouble);
4870}
4871
4872DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4873 : Semantics(&S),
4874 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4875 APFloat(semIEEEdouble, uninitialized)}) {
4876 assert(Semantics == &semPPCDoubleDouble);
4877}
4878
4879DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4880 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4881 APFloat(semIEEEdouble)}) {
4882 assert(Semantics == &semPPCDoubleDouble);
4883}
4884
4885DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4886 : Semantics(&S),
4887 Floats(new APFloat[2]{
4888 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4889 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4890 assert(Semantics == &semPPCDoubleDouble);
4891}
4892
4893DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4894 APFloat &&Second)
4895 : Semantics(&S),
4896 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4897 assert(Semantics == &semPPCDoubleDouble);
4898 assert(&Floats[0].getSemantics() == &semIEEEdouble);
4899 assert(&Floats[1].getSemantics() == &semIEEEdouble);
4900}
4901
4902DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4903 : Semantics(RHS.Semantics),
4904 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4905 APFloat(RHS.Floats[1])}
4906 : nullptr) {
4907 assert(Semantics == &semPPCDoubleDouble);
4908}
4909
4910DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4911 : Semantics(RHS.Semantics), Floats(RHS.Floats) {
4912 RHS.Semantics = &semBogus;
4913 RHS.Floats = nullptr;
4914 assert(Semantics == &semPPCDoubleDouble);
4915}
4916
4917DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4918 if (Semantics == RHS.Semantics && RHS.Floats) {
4919 Floats[0] = RHS.Floats[0];
4920 Floats[1] = RHS.Floats[1];
4921 } else if (this != &RHS) {
4922 this->~DoubleAPFloat();
4923 new (this) DoubleAPFloat(RHS);
4924 }
4925 return *this;
4926}
4927
4928// Implement addition, subtraction, multiplication and division based on:
4929// "Software for Doubled-Precision Floating-Point Computations",
4930// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4931APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4932 const APFloat &c, const APFloat &cc,
4933 roundingMode RM) {
4934 int Status = opOK;
4935 APFloat z = a;
4936 Status |= z.add(RHS: c, RM);
4937 if (!z.isFinite()) {
4938 if (!z.isInfinity()) {
4939 Floats[0] = std::move(z);
4940 Floats[1].makeZero(/* Neg = */ false);
4941 return (opStatus)Status;
4942 }
4943 Status = opOK;
4944 auto AComparedToC = a.compareAbsoluteValue(RHS: c);
4945 z = cc;
4946 Status |= z.add(RHS: aa, RM);
4947 if (AComparedToC == APFloat::cmpGreaterThan) {
4948 // z = cc + aa + c + a;
4949 Status |= z.add(RHS: c, RM);
4950 Status |= z.add(RHS: a, RM);
4951 } else {
4952 // z = cc + aa + a + c;
4953 Status |= z.add(RHS: a, RM);
4954 Status |= z.add(RHS: c, RM);
4955 }
4956 if (!z.isFinite()) {
4957 Floats[0] = std::move(z);
4958 Floats[1].makeZero(/* Neg = */ false);
4959 return (opStatus)Status;
4960 }
4961 Floats[0] = z;
4962 APFloat zz = aa;
4963 Status |= zz.add(RHS: cc, RM);
4964 if (AComparedToC == APFloat::cmpGreaterThan) {
4965 // Floats[1] = a - z + c + zz;
4966 Floats[1] = a;
4967 Status |= Floats[1].subtract(RHS: z, RM);
4968 Status |= Floats[1].add(RHS: c, RM);
4969 Status |= Floats[1].add(RHS: zz, RM);
4970 } else {
4971 // Floats[1] = c - z + a + zz;
4972 Floats[1] = c;
4973 Status |= Floats[1].subtract(RHS: z, RM);
4974 Status |= Floats[1].add(RHS: a, RM);
4975 Status |= Floats[1].add(RHS: zz, RM);
4976 }
4977 } else {
4978 // q = a - z;
4979 APFloat q = a;
4980 Status |= q.subtract(RHS: z, RM);
4981
4982 // zz = q + c + (a - (q + z)) + aa + cc;
4983 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4984 auto zz = q;
4985 Status |= zz.add(RHS: c, RM);
4986 Status |= q.add(RHS: z, RM);
4987 Status |= q.subtract(RHS: a, RM);
4988 q.changeSign();
4989 Status |= zz.add(RHS: q, RM);
4990 Status |= zz.add(RHS: aa, RM);
4991 Status |= zz.add(RHS: cc, RM);
4992 if (zz.isZero() && !zz.isNegative()) {
4993 Floats[0] = std::move(z);
4994 Floats[1].makeZero(/* Neg = */ false);
4995 return opOK;
4996 }
4997 Floats[0] = z;
4998 Status |= Floats[0].add(RHS: zz, RM);
4999 if (!Floats[0].isFinite()) {
5000 Floats[1].makeZero(/* Neg = */ false);
5001 return (opStatus)Status;
5002 }
5003 Floats[1] = std::move(z);
5004 Status |= Floats[1].subtract(RHS: Floats[0], RM);
5005 Status |= Floats[1].add(RHS: zz, RM);
5006 }
5007 return (opStatus)Status;
5008}
5009
5010APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
5011 const DoubleAPFloat &RHS,
5012 DoubleAPFloat &Out,
5013 roundingMode RM) {
5014 if (LHS.getCategory() == fcNaN) {
5015 Out = LHS;
5016 return opOK;
5017 }
5018 if (RHS.getCategory() == fcNaN) {
5019 Out = RHS;
5020 return opOK;
5021 }
5022 if (LHS.getCategory() == fcZero) {
5023 Out = RHS;
5024 return opOK;
5025 }
5026 if (RHS.getCategory() == fcZero) {
5027 Out = LHS;
5028 return opOK;
5029 }
5030 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
5031 LHS.isNegative() != RHS.isNegative()) {
5032 Out.makeNaN(SNaN: false, Neg: Out.isNegative(), fill: nullptr);
5033 return opInvalidOp;
5034 }
5035 if (LHS.getCategory() == fcInfinity) {
5036 Out = LHS;
5037 return opOK;
5038 }
5039 if (RHS.getCategory() == fcInfinity) {
5040 Out = RHS;
5041 return opOK;
5042 }
5043 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
5044
5045 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
5046 CC(RHS.Floats[1]);
5047 assert(&A.getSemantics() == &semIEEEdouble);
5048 assert(&AA.getSemantics() == &semIEEEdouble);
5049 assert(&C.getSemantics() == &semIEEEdouble);
5050 assert(&CC.getSemantics() == &semIEEEdouble);
5051 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
5052 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
5053 return Out.addImpl(a: A, aa: AA, c: C, cc: CC, RM);
5054}
5055
5056APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
5057 roundingMode RM) {
5058 return addWithSpecial(LHS: *this, RHS, Out&: *this, RM);
5059}
5060
5061APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
5062 roundingMode RM) {
5063 changeSign();
5064 auto Ret = add(RHS, RM);
5065 changeSign();
5066 return Ret;
5067}
5068
5069APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
5070 APFloat::roundingMode RM) {
5071 const auto &LHS = *this;
5072 auto &Out = *this;
5073 /* Interesting observation: For special categories, finding the lowest
5074 common ancestor of the following layered graph gives the correct
5075 return category:
5076
5077 NaN
5078 / \
5079 Zero Inf
5080 \ /
5081 Normal
5082
5083 e.g. NaN * NaN = NaN
5084 Zero * Inf = NaN
5085 Normal * Zero = Zero
5086 Normal * Inf = Inf
5087 */
5088 if (LHS.getCategory() == fcNaN) {
5089 Out = LHS;
5090 return opOK;
5091 }
5092 if (RHS.getCategory() == fcNaN) {
5093 Out = RHS;
5094 return opOK;
5095 }
5096 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
5097 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
5098 Out.makeNaN(SNaN: false, Neg: false, fill: nullptr);
5099 return opOK;
5100 }
5101 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
5102 Out = LHS;
5103 return opOK;
5104 }
5105 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
5106 Out = RHS;
5107 return opOK;
5108 }
5109 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
5110 "Special cases not handled exhaustively");
5111
5112 int Status = opOK;
5113 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
5114 // t = a * c
5115 APFloat T = A;
5116 Status |= T.multiply(RHS: C, RM);
5117 if (!T.isFiniteNonZero()) {
5118 Floats[0] = T;
5119 Floats[1].makeZero(/* Neg = */ false);
5120 return (opStatus)Status;
5121 }
5122
5123 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
5124 APFloat Tau = A;
5125 T.changeSign();
5126 Status |= Tau.fusedMultiplyAdd(Multiplicand: C, Addend: T, RM);
5127 T.changeSign();
5128 {
5129 // v = a * d
5130 APFloat V = A;
5131 Status |= V.multiply(RHS: D, RM);
5132 // w = b * c
5133 APFloat W = B;
5134 Status |= W.multiply(RHS: C, RM);
5135 Status |= V.add(RHS: W, RM);
5136 // tau += v + w
5137 Status |= Tau.add(RHS: V, RM);
5138 }
5139 // u = t + tau
5140 APFloat U = T;
5141 Status |= U.add(RHS: Tau, RM);
5142
5143 Floats[0] = U;
5144 if (!U.isFinite()) {
5145 Floats[1].makeZero(/* Neg = */ false);
5146 } else {
5147 // Floats[1] = (t - u) + tau
5148 Status |= T.subtract(RHS: U, RM);
5149 Status |= T.add(RHS: Tau, RM);
5150 Floats[1] = T;
5151 }
5152 return (opStatus)Status;
5153}
5154
5155APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
5156 APFloat::roundingMode RM) {
5157 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5158 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5159 auto Ret =
5160 Tmp.divide(RHS: APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
5161 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5162 return Ret;
5163}
5164
5165APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
5166 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5167 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5168 auto Ret =
5169 Tmp.remainder(RHS: APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5170 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5171 return Ret;
5172}
5173
5174APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
5175 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5176 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5177 auto Ret = Tmp.mod(RHS: APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5178 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5179 return Ret;
5180}
5181
5182APFloat::opStatus
5183DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
5184 const DoubleAPFloat &Addend,
5185 APFloat::roundingMode RM) {
5186 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5187 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5188 auto Ret = Tmp.fusedMultiplyAdd(
5189 Multiplicand: APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
5190 Addend: APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
5191 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5192 return Ret;
5193}
5194
5195APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
5196 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5197 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5198 auto Ret = Tmp.roundToIntegral(RM);
5199 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5200 return Ret;
5201}
5202
5203void DoubleAPFloat::changeSign() {
5204 Floats[0].changeSign();
5205 Floats[1].changeSign();
5206}
5207
5208APFloat::cmpResult
5209DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
5210 auto Result = Floats[0].compareAbsoluteValue(RHS: RHS.Floats[0]);
5211 if (Result != cmpEqual)
5212 return Result;
5213 Result = Floats[1].compareAbsoluteValue(RHS: RHS.Floats[1]);
5214 if (Result == cmpLessThan || Result == cmpGreaterThan) {
5215 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
5216 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
5217 if (Against && !RHSAgainst)
5218 return cmpLessThan;
5219 if (!Against && RHSAgainst)
5220 return cmpGreaterThan;
5221 if (!Against && !RHSAgainst)
5222 return Result;
5223 if (Against && RHSAgainst)
5224 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
5225 }
5226 return Result;
5227}
5228
5229APFloat::fltCategory DoubleAPFloat::getCategory() const {
5230 return Floats[0].getCategory();
5231}
5232
5233bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5234
5235void DoubleAPFloat::makeInf(bool Neg) {
5236 Floats[0].makeInf(Neg);
5237 Floats[1].makeZero(/* Neg = */ false);
5238}
5239
5240void DoubleAPFloat::makeZero(bool Neg) {
5241 Floats[0].makeZero(Neg);
5242 Floats[1].makeZero(/* Neg = */ false);
5243}
5244
5245void DoubleAPFloat::makeLargest(bool Neg) {
5246 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5247 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5248 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5249 if (Neg)
5250 changeSign();
5251}
5252
5253void DoubleAPFloat::makeSmallest(bool Neg) {
5254 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5255 Floats[0].makeSmallest(Neg);
5256 Floats[1].makeZero(/* Neg = */ false);
5257}
5258
5259void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
5260 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5261 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
5262 if (Neg)
5263 Floats[0].changeSign();
5264 Floats[1].makeZero(/* Neg = */ false);
5265}
5266
5267void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5268 Floats[0].makeNaN(SNaN, Neg, fill);
5269 Floats[1].makeZero(/* Neg = */ false);
5270}
5271
5272APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
5273 auto Result = Floats[0].compare(RHS: RHS.Floats[0]);
5274 // |Float[0]| > |Float[1]|
5275 if (Result == APFloat::cmpEqual)
5276 return Floats[1].compare(RHS: RHS.Floats[1]);
5277 return Result;
5278}
5279
5280bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
5281 return Floats[0].bitwiseIsEqual(RHS: RHS.Floats[0]) &&
5282 Floats[1].bitwiseIsEqual(RHS: RHS.Floats[1]);
5283}
5284
5285hash_code hash_value(const DoubleAPFloat &Arg) {
5286 if (Arg.Floats)
5287 return hash_combine(args: hash_value(Arg: Arg.Floats[0]), args: hash_value(Arg: Arg.Floats[1]));
5288 return hash_combine(args: Arg.Semantics);
5289}
5290
5291APInt DoubleAPFloat::bitcastToAPInt() const {
5292 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5293 uint64_t Data[] = {
5294 Floats[0].bitcastToAPInt().getRawData()[0],
5295 Floats[1].bitcastToAPInt().getRawData()[0],
5296 };
5297 return APInt(128, 2, Data);
5298}
5299
5300Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
5301 roundingMode RM) {
5302 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5303 APFloat Tmp(semPPCDoubleDoubleLegacy);
5304 auto Ret = Tmp.convertFromString(S, RM);
5305 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5306 return Ret;
5307}
5308
5309APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
5310 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5311 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5312 auto Ret = Tmp.next(nextDown);
5313 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5314 return Ret;
5315}
5316
5317APFloat::opStatus
5318DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
5319 unsigned int Width, bool IsSigned,
5320 roundingMode RM, bool *IsExact) const {
5321 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5322 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5323 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
5324}
5325
5326APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
5327 bool IsSigned,
5328 roundingMode RM) {
5329 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5330 APFloat Tmp(semPPCDoubleDoubleLegacy);
5331 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
5332 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5333 return Ret;
5334}
5335
5336APFloat::opStatus
5337DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
5338 unsigned int InputSize,
5339 bool IsSigned, roundingMode RM) {
5340 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5341 APFloat Tmp(semPPCDoubleDoubleLegacy);
5342 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
5343 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5344 return Ret;
5345}
5346
5347APFloat::opStatus
5348DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
5349 unsigned int InputSize,
5350 bool IsSigned, roundingMode RM) {
5351 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5352 APFloat Tmp(semPPCDoubleDoubleLegacy);
5353 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
5354 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5355 return Ret;
5356}
5357
5358unsigned int DoubleAPFloat::convertToHexString(char *DST,
5359 unsigned int HexDigits,
5360 bool UpperCase,
5361 roundingMode RM) const {
5362 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5363 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5364 .convertToHexString(DST, HexDigits, UpperCase, RM);
5365}
5366
5367bool DoubleAPFloat::isDenormal() const {
5368 return getCategory() == fcNormal &&
5369 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5370 // (double)(Hi + Lo) == Hi defines a normal number.
5371 Floats[0] != Floats[0] + Floats[1]);
5372}
5373
5374bool DoubleAPFloat::isSmallest() const {
5375 if (getCategory() != fcNormal)
5376 return false;
5377 DoubleAPFloat Tmp(*this);
5378 Tmp.makeSmallest(Neg: this->isNegative());
5379 return Tmp.compare(RHS: *this) == cmpEqual;
5380}
5381
5382bool DoubleAPFloat::isSmallestNormalized() const {
5383 if (getCategory() != fcNormal)
5384 return false;
5385
5386 DoubleAPFloat Tmp(*this);
5387 Tmp.makeSmallestNormalized(Neg: this->isNegative());
5388 return Tmp.compare(RHS: *this) == cmpEqual;
5389}
5390
5391bool DoubleAPFloat::isLargest() const {
5392 if (getCategory() != fcNormal)
5393 return false;
5394 DoubleAPFloat Tmp(*this);
5395 Tmp.makeLargest(Neg: this->isNegative());
5396 return Tmp.compare(RHS: *this) == cmpEqual;
5397}
5398
5399bool DoubleAPFloat::isInteger() const {
5400 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5401 return Floats[0].isInteger() && Floats[1].isInteger();
5402}
5403
5404void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
5405 unsigned FormatPrecision,
5406 unsigned FormatMaxPadding,
5407 bool TruncateZero) const {
5408 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5409 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5410 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5411}
5412
5413bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
5414 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5415 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5416 if (!inv)
5417 return Tmp.getExactInverse(inv: nullptr);
5418 APFloat Inv(semPPCDoubleDoubleLegacy);
5419 auto Ret = Tmp.getExactInverse(inv: &Inv);
5420 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
5421 return Ret;
5422}
5423
5424int DoubleAPFloat::getExactLog2() const {
5425 // TODO: Implement me
5426 return INT_MIN;
5427}
5428
5429int DoubleAPFloat::getExactLog2Abs() const {
5430 // TODO: Implement me
5431 return INT_MIN;
5432}
5433
5434DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
5435 APFloat::roundingMode RM) {
5436 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5437 return DoubleAPFloat(semPPCDoubleDouble, scalbn(X: Arg.Floats[0], Exp, RM),
5438 scalbn(X: Arg.Floats[1], Exp, RM));
5439}
5440
5441DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5442 APFloat::roundingMode RM) {
5443 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5444 APFloat First = frexp(X: Arg.Floats[0], Exp, RM);
5445 APFloat Second = Arg.Floats[1];
5446 if (Arg.getCategory() == APFloat::fcNormal)
5447 Second = scalbn(X: Second, Exp: -Exp, RM);
5448 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
5449}
5450
5451} // namespace detail
5452
5453APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5454 if (usesLayout<IEEEFloat>(Semantics)) {
5455 new (&IEEE) IEEEFloat(std::move(F));
5456 return;
5457 }
5458 if (usesLayout<DoubleAPFloat>(Semantics)) {
5459 const fltSemantics& S = F.getSemantics();
5460 new (&Double)
5461 DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5462 APFloat(semIEEEdouble));
5463 return;
5464 }
5465 llvm_unreachable("Unexpected semantics");
5466}
5467
5468Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
5469 roundingMode RM) {
5470 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
5471}
5472
5473hash_code hash_value(const APFloat &Arg) {
5474 if (APFloat::usesLayout<detail::IEEEFloat>(Semantics: Arg.getSemantics()))
5475 return hash_value(Arg: Arg.U.IEEE);
5476 if (APFloat::usesLayout<detail::DoubleAPFloat>(Semantics: Arg.getSemantics()))
5477 return hash_value(Arg: Arg.U.Double);
5478 llvm_unreachable("Unexpected semantics");
5479}
5480
5481APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
5482 : APFloat(Semantics) {
5483 auto StatusOrErr = convertFromString(Str: S, RM: rmNearestTiesToEven);
5484 assert(StatusOrErr && "Invalid floating point representation");
5485 consumeError(Err: StatusOrErr.takeError());
5486}
5487
5488FPClassTest APFloat::classify() const {
5489 if (isZero())
5490 return isNegative() ? fcNegZero : fcPosZero;
5491 if (isNormal())
5492 return isNegative() ? fcNegNormal : fcPosNormal;
5493 if (isDenormal())
5494 return isNegative() ? fcNegSubnormal : fcPosSubnormal;
5495 if (isInfinity())
5496 return isNegative() ? fcNegInf : fcPosInf;
5497 assert(isNaN() && "Other class of FP constant");
5498 return isSignaling() ? fcSNan : fcQNan;
5499}
5500
5501APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
5502 roundingMode RM, bool *losesInfo) {
5503 if (&getSemantics() == &ToSemantics) {
5504 *losesInfo = false;
5505 return opOK;
5506 }
5507 if (usesLayout<IEEEFloat>(Semantics: getSemantics()) &&
5508 usesLayout<IEEEFloat>(Semantics: ToSemantics))
5509 return U.IEEE.convert(toSemantics: ToSemantics, rounding_mode: RM, losesInfo);
5510 if (usesLayout<IEEEFloat>(Semantics: getSemantics()) &&
5511 usesLayout<DoubleAPFloat>(Semantics: ToSemantics)) {
5512 assert(&ToSemantics == &semPPCDoubleDouble);
5513 auto Ret = U.IEEE.convert(toSemantics: semPPCDoubleDoubleLegacy, rounding_mode: RM, losesInfo);
5514 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5515 return Ret;
5516 }
5517 if (usesLayout<DoubleAPFloat>(Semantics: getSemantics()) &&
5518 usesLayout<IEEEFloat>(Semantics: ToSemantics)) {
5519 auto Ret = getIEEE().convert(toSemantics: ToSemantics, rounding_mode: RM, losesInfo);
5520 *this = APFloat(std::move(getIEEE()), ToSemantics);
5521 return Ret;
5522 }
5523 llvm_unreachable("Unexpected semantics");
5524}
5525
5526APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
5527 return APFloat(Semantics, APInt::getAllOnes(numBits: Semantics.sizeInBits));
5528}
5529
5530void APFloat::print(raw_ostream &OS) const {
5531 SmallVector<char, 16> Buffer;
5532 toString(Str&: Buffer);
5533 OS << Buffer;
5534}
5535
5536#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5537LLVM_DUMP_METHOD void APFloat::dump() const {
5538 print(dbgs());
5539 dbgs() << '\n';
5540}
5541#endif
5542
5543void APFloat::Profile(FoldingSetNodeID &NID) const {
5544 NID.Add(x: bitcastToAPInt());
5545}
5546
5547/* Same as convertToInteger(integerPart*, ...), except the result is returned in
5548 an APSInt, whose initial bit-width and signed-ness are used to determine the
5549 precision of the conversion.
5550 */
5551APFloat::opStatus APFloat::convertToInteger(APSInt &result,
5552 roundingMode rounding_mode,
5553 bool *isExact) const {
5554 unsigned bitWidth = result.getBitWidth();
5555 SmallVector<uint64_t, 4> parts(result.getNumWords());
5556 opStatus status = convertToInteger(Input: parts, Width: bitWidth, IsSigned: result.isSigned(),
5557 RM: rounding_mode, IsExact: isExact);
5558 // Keeps the original signed-ness.
5559 result = APInt(bitWidth, parts);
5560 return status;
5561}
5562
5563double APFloat::convertToDouble() const {
5564 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
5565 return getIEEE().convertToDouble();
5566 assert(isRepresentableBy(getSemantics(), semIEEEdouble) &&
5567 "Float semantics is not representable by IEEEdouble");
5568 APFloat Temp = *this;
5569 bool LosesInfo;
5570 opStatus St = Temp.convert(ToSemantics: semIEEEdouble, RM: rmNearestTiesToEven, losesInfo: &LosesInfo);
5571 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5572 (void)St;
5573 return Temp.getIEEE().convertToDouble();
5574}
5575
5576#ifdef HAS_IEE754_FLOAT128
5577float128 APFloat::convertToQuad() const {
5578 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad)
5579 return getIEEE().convertToQuad();
5580 assert(isRepresentableBy(getSemantics(), semIEEEquad) &&
5581 "Float semantics is not representable by IEEEquad");
5582 APFloat Temp = *this;
5583 bool LosesInfo;
5584 opStatus St = Temp.convert(ToSemantics: semIEEEquad, RM: rmNearestTiesToEven, losesInfo: &LosesInfo);
5585 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5586 (void)St;
5587 return Temp.getIEEE().convertToQuad();
5588}
5589#endif
5590
5591float APFloat::convertToFloat() const {
5592 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
5593 return getIEEE().convertToFloat();
5594 assert(isRepresentableBy(getSemantics(), semIEEEsingle) &&
5595 "Float semantics is not representable by IEEEsingle");
5596 APFloat Temp = *this;
5597 bool LosesInfo;
5598 opStatus St = Temp.convert(ToSemantics: semIEEEsingle, RM: rmNearestTiesToEven, losesInfo: &LosesInfo);
5599 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5600 (void)St;
5601 return Temp.getIEEE().convertToFloat();
5602}
5603
5604} // namespace llvm
5605
5606#undef APFLOAT_DISPATCH_ON_SEMANTICS
5607