APInt.cpp source code [llvm_projects/llvm/lib/Support/APInt.cpp]

1	//===-- APInt.cpp - Implement APInt class ---------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements a class to represent arbitrary precision integer
10	// constant values and provide a variety of arithmetic operations on them.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/ADT/APInt.h"
15	#include "llvm/ADT/ArrayRef.h"
16	#include "llvm/ADT/FoldingSet.h"
17	#include "llvm/ADT/Hashing.h"
18	#include "llvm/ADT/SmallString.h"
19	#include "llvm/ADT/StringRef.h"
20	#include "llvm/ADT/bit.h"
21	#include "llvm/Config/llvm-config.h"
22	#include "llvm/Support/Alignment.h"
23	#include "llvm/Support/Debug.h"
24	#include "llvm/Support/ErrorHandling.h"
25	#include "llvm/Support/MathExtras.h"
26	#include "llvm/Support/raw_ostream.h"
27	#include <cmath>
28	#include <optional>
29
30	using namespace llvm;
31
32	#define DEBUG_TYPE "apint"
33
34	/// A utility function for allocating memory, checking for allocation failures,
35	/// and ensuring the contents are zeroed.
36	inline static uint64_t* getClearedMemory(unsigned numWords) {
37	uint64_t result = new* uint64_t[numWords];
38	memset(s: result, c: `0`, n: numWords * sizeof(uint64_t));
39	return result;
40	}
41
42	/// A utility function for allocating memory and checking for allocation
43	/// failure. The content is not zeroed.
44	inline static uint64_t* getMemory(unsigned numWords) {
45	return new uint64_t[numWords];
46	}
47
48	/// A utility function that converts a character to a digit.
49	inline static unsigned getDigit(char cdigit, uint8_t radix) {
50	unsigned r;
51
52	if (radix == `16` \|\| radix == `36`) {
53	r = cdigit - `'0'`;
54	if (r <= `9`)
55	return r;
56
57	r = cdigit - `'A'`;
58	if (r <= radix - `11U`)
59	return r + `10`;
60
61	r = cdigit - `'a'`;
62	if (r <= radix - `11U`)
63	return r + `10`;
64
65	radix = `10`;
66	}
67
68	r = cdigit - `'0'`;
69	if (r < radix)
70	return r;
71
72	return UINT_MAX;
73	}
74
75
76	void APInt::initSlowCase(uint64_t val, bool isSigned) {
77	U.pVal = getClearedMemory(numWords: getNumWords());
78	U.pVal[`0`] = val;
79	if (isSigned && int64_t(val) < `0`)
80	for (unsigned i = `1`; i < getNumWords(); ++i)
81	U.pVal[i] = WORDTYPE_MAX;
82	clearUnusedBits();
83	}
84
85	void APInt::initSlowCase(const APInt& that) {
86	U.pVal = getMemory(numWords: getNumWords());
87	memcpy(dest: U.pVal, src: that.U.pVal, n: getNumWords() * APINT_WORD_SIZE);
88	}
89
90	void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
91	assert(bigVal.data() && "Null pointer detected!");
92	if (isSingleWord())
93	U.VAL = bigVal [`0`];
94	else {
95	// Get memory, cleared to 0
96	U.pVal = getClearedMemory(numWords: getNumWords());
97	// Calculate the number of words to copy
98	unsigned words = std::min<unsigned>(a: bigVal.size(), b: getNumWords());
99	// Copy the words from bigVal to pVal
100	memcpy(dest: U.pVal, src: bigVal.data(), n: words * APINT_WORD_SIZE);
101	}
102	// Make sure unused high bits are cleared
103	clearUnusedBits();
104	}
105
106	APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) : BitWidth(numBits) {
107	initFromArray(bigVal);
108	}
109
110	APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
111	: BitWidth(numBits) {
112	initFromArray(bigVal: ArrayRef(bigVal, numWords));
113	}
114
115	APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
116	: BitWidth(numbits) {
117	fromString(numBits: numbits, str: Str, radix);
118	}
119
120	void APInt::reallocate(unsigned NewBitWidth) {
121	// If the number of words is the same we can just change the width and stop.
122	if (getNumWords() == getNumWords(BitWidth: NewBitWidth)) {
123	BitWidth = NewBitWidth;
124	return;
125	}
126
127	// If we have an allocation, delete it.
128	if (!isSingleWord())
129	delete [] U.pVal;
130
131	// Update BitWidth.
132	BitWidth = NewBitWidth;
133
134	// If we are supposed to have an allocation, create it.
135	if (!isSingleWord())
136	U.pVal = getMemory(numWords: getNumWords());
137	}
138
139	void APInt::assignSlowCase(const APInt &RHS) {
140	// Don't do anything for X = X
141	if (this == &RHS)
142	return;
143
144	// Adjust the bit width and handle allocations as necessary.
145	reallocate(NewBitWidth: RHS.getBitWidth());
146
147	// Copy the data.
148	if (isSingleWord())
149	U.VAL = RHS.U.VAL;
150	else
151	memcpy(dest: U.pVal, src: RHS.U.pVal, n: getNumWords() * APINT_WORD_SIZE);
152	}
153
154	/// This method 'profiles' an APInt for use with FoldingSet.
155	void APInt::Profile(FoldingSetNodeID& ID) const {
156	ID.AddInteger(I: BitWidth);
157
158	if (isSingleWord()) {
159	ID.AddInteger(I: U.VAL);
160	return;
161	}
162
163	unsigned NumWords = getNumWords();
164	for (unsigned i = `0`; i < NumWords; ++i)
165	ID.AddInteger(I: U.pVal[i]);
166	}
167
168	bool APInt::isAligned(Align A) const {
169	if (isZero())
170	return true;
171	const unsigned TrailingZeroes = countr_zero();
172	const unsigned MinimumTrailingZeroes = Log2(A);
173	return TrailingZeroes >= MinimumTrailingZeroes;
174	}
175
176	/// Prefix increment operator. Increments the APInt by one.
177	APInt& APInt::operator++() {
178	if (isSingleWord())
179	++U.VAL;
180	else
181	tcIncrement(dst: U.pVal, parts: getNumWords());
182	return clearUnusedBits();
183	}
184
185	/// Prefix decrement operator. Decrements the APInt by one.
186	APInt& APInt::operator--() {
187	if (isSingleWord())
188	--U.VAL;
189	else
190	tcDecrement(dst: U.pVal, parts: getNumWords());
191	return clearUnusedBits();
192	}
193
194	/// Adds the RHS APInt to this APInt.
195	/// @returns this, after addition of RHS.
196	/// Addition assignment operator.
197	APInt& APInt::operator+=(const APInt& RHS) {
198	assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
199	if (isSingleWord())
200	U.VAL += RHS.U.VAL;
201	else
202	tcAdd(U.pVal, RHS.U.pVal, carry: `0`, getNumWords());
203	return clearUnusedBits();
204	}
205
206	APInt& APInt::operator+=(uint64_t RHS) {
207	if (isSingleWord())
208	U.VAL += RHS;
209	else
210	tcAddPart(U.pVal, RHS, getNumWords());
211	return clearUnusedBits();
212	}
213
214	/// Subtracts the RHS APInt from this APInt
215	/// @returns this, after subtraction
216	/// Subtraction assignment operator.
217	APInt& APInt::operator-=(const APInt& RHS) {
218	assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
219	if (isSingleWord())
220	U.VAL -= RHS.U.VAL;
221	else
222	tcSubtract(U.pVal, RHS.U.pVal, carry: `0`, getNumWords());
223	return clearUnusedBits();
224	}
225
226	APInt& APInt::operator-=(uint64_t RHS) {
227	if (isSingleWord())
228	U.VAL -= RHS;
229	else
230	tcSubtractPart(U.pVal, RHS, getNumWords());
231	return clearUnusedBits();
232	}
233
234	APInt APInt::operator(const* APInt& RHS) const {
235	assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
236	if (isSingleWord())
237	return APInt (BitWidth, U.VAL * RHS.U.VAL);
238
239	APInt Result(getMemory(numWords: getNumWords()), getBitWidth());
240	tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
241	Result.clearUnusedBits();
242	return Result;
243	}
244
245	void APInt::andAssignSlowCase(const APInt &RHS) {
246	WordType dst = U.pVal, rhs = RHS.U.pVal;
247	for (size_t i = `0`, e = getNumWords(); i != e; ++i)
248	dst[i] &= rhs[i];
249	}
250
251	void APInt::orAssignSlowCase(const APInt &RHS) {
252	WordType dst = U.pVal, rhs = RHS.U.pVal;
253	for (size_t i = `0`, e = getNumWords(); i != e; ++i)
254	dst[i] \|= rhs[i];
255	}
256
257	void APInt::xorAssignSlowCase(const APInt &RHS) {
258	WordType dst = U.pVal, rhs = RHS.U.pVal;
259	for (size_t i = `0`, e = getNumWords(); i != e; ++i)
260	dst[i] ^= rhs[i];
261	}
262
263	APInt &APInt::operator=(const* APInt &RHS) {
264	*this = *this * RHS;
265	return *this;
266	}
267
268	APInt& APInt::operator*=(uint64_t RHS) {
269	if (isSingleWord()) {
270	U.VAL *= RHS;
271	} else {
272	unsigned NumWords = getNumWords();
273	tcMultiplyPart(dst: U.pVal, src: U.pVal, multiplier: RHS, carry: `0`, srcParts: NumWords, dstParts: NumWords, add: false);
274	}
275	return clearUnusedBits();
276	}
277
278	bool APInt::equalSlowCase(const APInt &RHS) const {
279	return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
280	}
281
282	int APInt::compare(const APInt& RHS) const {
283	assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
284	if (isSingleWord())
285	return U.VAL < RHS.U.VAL ? -`1` : U.VAL > RHS.U.VAL;
286
287	return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
288	}
289
290	int APInt::compareSigned(const APInt& RHS) const {
291	assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
292	if (isSingleWord()) {
293	int64_t lhsSext = SignExtend64(X: U.VAL, B: BitWidth);
294	int64_t rhsSext = SignExtend64(X: RHS.U.VAL, B: BitWidth);
295	return lhsSext < rhsSext ? -`1` : lhsSext > rhsSext;
296	}
297
298	bool lhsNeg = isNegative();
299	bool rhsNeg = RHS.isNegative();
300
301	// If the sign bits don't match, then (LHS < RHS) if LHS is negative
302	if (lhsNeg != rhsNeg)
303	return lhsNeg ? -`1` : `1`;
304
305	// Otherwise we can just use an unsigned comparison, because even negative
306	// numbers compare correctly this way if both have the same signed-ness.
307	return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
308	}
309
310	void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
311	unsigned loWord = whichWord(bitPosition: loBit);
312	unsigned hiWord = whichWord(bitPosition: hiBit);
313
314	// Create an initial mask for the low word with zeros below loBit.
315	uint64_t loMask = WORDTYPE_MAX << whichBit(bitPosition: loBit);
316
317	// If hiBit is not aligned, we need a high mask.
318	unsigned hiShiftAmt = whichBit(bitPosition: hiBit);
319	if (hiShiftAmt != `0`) {
320	// Create a high mask with zeros above hiBit.
321	uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
322	// If loWord and hiWord are equal, then we combine the masks. Otherwise,
323	// set the bits in hiWord.
324	if (hiWord == loWord)
325	loMask &= hiMask;
326	else
327	U.pVal[hiWord] \|= hiMask;
328	}
329	// Apply the mask to the low word.
330	U.pVal[loWord] \|= loMask;
331
332	// Fill any words between loWord and hiWord with all ones.
333	for (unsigned word = loWord + `1`; word < hiWord; ++word)
334	U.pVal[word] = WORDTYPE_MAX;
335	}
336
337	// Complement a bignum in-place.
338	static void tcComplement(APInt::WordType dst, unsigned* parts) {
339	for (unsigned i = `0`; i < parts; i++)
340	dst[i] = ~dst[i];
341	}
342
343	/// Toggle every bit to its opposite value.
344	void APInt::flipAllBitsSlowCase() {
345	tcComplement(dst: U.pVal, parts: getNumWords());
346	clearUnusedBits();
347	}
348
349	/// Concatenate the bits from "NewLSB" onto the bottom of this. This is*
350	/// equivalent to:
351	/// (this->zext(NewWidth) << NewLSB.getBitWidth()) \| NewLSB.zext(NewWidth)
352	/// In the slow case, we know the result is large.
353	APInt APInt::concatSlowCase(const APInt &NewLSB) const {
354	unsigned NewWidth = getBitWidth() + NewLSB.getBitWidth();
355	APInt Result = NewLSB.zext(width: NewWidth);
356	Result.insertBits(SubBits: *this, bitPosition: NewLSB.getBitWidth());
357	return Result;
358	}
359
360	/// Toggle a given bit to its opposite value whose position is given
361	/// as "bitPosition".
362	/// Toggles a given bit to its opposite value.
363	void APInt::flipBit(unsigned bitPosition) {
364	assert(bitPosition < BitWidth && "Out of the bit-width range!");
365	setBitVal(BitPosition: bitPosition, BitValue: !(*this)[bitPosition]);
366	}
367
368	void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
369	unsigned subBitWidth = subBits.getBitWidth();
370	assert((subBitWidth + bitPosition) <= BitWidth && "Illegal bit insertion");
371
372	// inserting no bits is a noop.
373	if (subBitWidth == `0`)
374	return;
375
376	// Insertion is a direct copy.
377	if (subBitWidth == BitWidth) {
378	*this = subBits;
379	return;
380	}
381
382	// Single word result can be done as a direct bitmask.
383	if (isSingleWord()) {
384	uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
385	U.VAL &= ~(mask << bitPosition);
386	U.VAL \|= (subBits.U.VAL << bitPosition);
387	return;
388	}
389
390	unsigned loBit = whichBit(bitPosition);
391	unsigned loWord = whichWord(bitPosition);
392	unsigned hi1Word = whichWord(bitPosition: bitPosition + subBitWidth - `1`);
393
394	// Insertion within a single word can be done as a direct bitmask.
395	if (loWord == hi1Word) {
396	uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
397	U.pVal[loWord] &= ~(mask << loBit);
398	U.pVal[loWord] \|= (subBits.U.VAL << loBit);
399	return;
400	}
401
402	// Insert on word boundaries.
403	if (loBit == `0`) {
404	// Direct copy whole words.
405	unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
406	memcpy(dest: U.pVal + loWord, src: subBits.getRawData(),
407	n: numWholeSubWords * APINT_WORD_SIZE);
408
409	// Mask+insert remaining bits.
410	unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
411	if (remainingBits != `0`) {
412	uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
413	U.pVal[hi1Word] &= ~mask;
414	U.pVal[hi1Word] \|= subBits.getWord(bitPosition: subBitWidth - `1`);
415	}
416	return;
417	}
418
419	// General case - set/clear individual bits in dst based on src.
420	// TODO - there is scope for optimization here, but at the moment this code
421	// path is barely used so prefer readability over performance.
422	for (unsigned i = `0`; i != subBitWidth; ++i)
423	setBitVal(BitPosition: bitPosition + i, BitValue: subBits [i]);
424	}
425
426	void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
427	uint64_t maskBits = maskTrailingOnes<uint64_t>(N: numBits);
428	subBits &= maskBits;
429	if (isSingleWord()) {
430	U.VAL &= ~(maskBits << bitPosition);
431	U.VAL \|= subBits << bitPosition;
432	return;
433	}
434
435	unsigned loBit = whichBit(bitPosition);
436	unsigned loWord = whichWord(bitPosition);
437	unsigned hiWord = whichWord(bitPosition: bitPosition + numBits - `1`);
438	if (loWord == hiWord) {
439	U.pVal[loWord] &= ~(maskBits << loBit);
440	U.pVal[loWord] \|= subBits << loBit;
441	return;
442	}
443
444	static_assert(`8` * sizeof(WordType) <= `64`, "This code assumes only two words affected");
445	unsigned wordBits = `8` * sizeof(WordType);
446	U.pVal[loWord] &= ~(maskBits << loBit);
447	U.pVal[loWord] \|= subBits << loBit;
448
449	U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
450	U.pVal[hiWord] \|= subBits >> (wordBits - loBit);
451	}
452
453	APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
454	assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
455	"Illegal bit extraction");
456
457	if (isSingleWord())
458	return APInt (numBits, U.VAL >> bitPosition);
459
460	unsigned loBit = whichBit(bitPosition);
461	unsigned loWord = whichWord(bitPosition);
462	unsigned hiWord = whichWord(bitPosition: bitPosition + numBits - `1`);
463
464	// Single word result extracting bits from a single word source.
465	if (loWord == hiWord)
466	return APInt (numBits, U.pVal[loWord] >> loBit);
467
468	// Extracting bits that start on a source word boundary can be done
469	// as a fast memory copy.
470	if (loBit == `0`)
471	return APInt (numBits, ArrayRef(U.pVal + loWord, `1` + hiWord - loWord));
472
473	// General case - shift + copy source words directly into place.
474	APInt Result(numBits, `0`);
475	unsigned NumSrcWords = getNumWords();
476	unsigned NumDstWords = Result.getNumWords();
477
478	uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
479	for (unsigned word = `0`; word < NumDstWords; ++word) {
480	uint64_t w0 = U.pVal[loWord + word];
481	uint64_t w1 =
482	(loWord + word + `1`) < NumSrcWords ? U.pVal[loWord + word + `1`] : `0`;
483	DestPtr[word] = (w0 >> loBit) \| (w1 << (APINT_BITS_PER_WORD - loBit));
484	}
485
486	return Result.clearUnusedBits();
487	}
488
489	uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
490	unsigned bitPosition) const {
491	assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
492	"Illegal bit extraction");
493	assert(numBits <= `64` && "Illegal bit extraction");
494
495	uint64_t maskBits = maskTrailingOnes<uint64_t>(N: numBits);
496	if (isSingleWord())
497	return (U.VAL >> bitPosition) & maskBits;
498
499	unsigned loBit = whichBit(bitPosition);
500	unsigned loWord = whichWord(bitPosition);
501	unsigned hiWord = whichWord(bitPosition: bitPosition + numBits - `1`);
502	if (loWord == hiWord)
503	return (U.pVal[loWord] >> loBit) & maskBits;
504
505	static_assert(`8` * sizeof(WordType) <= `64`, "This code assumes only two words affected");
506	unsigned wordBits = `8` * sizeof(WordType);
507	uint64_t retBits = U.pVal[loWord] >> loBit;
508	retBits \|= U.pVal[hiWord] << (wordBits - loBit);
509	retBits &= maskBits;
510	return retBits;
511	}
512
513	unsigned APInt::getSufficientBitsNeeded(StringRef Str, uint8_t Radix) {
514	assert(!Str.empty() && "Invalid string length");
515	size_t StrLen = Str.size();
516
517	// Each computation below needs to know if it's negative.
518	unsigned IsNegative = false;
519	if (Str [`0`] == `'-'` \|\| Str [`0`] == `'+'`) {
520	IsNegative = Str [`0`] == `'-'`;
521	StrLen--;
522	assert(StrLen && "String is only a sign, needs a value.");
523	}
524
525	// For radixes of power-of-two values, the bits required is accurately and
526	// easily computed.
527	if (Radix == `2`)
528	return StrLen + IsNegative;
529	if (Radix == `8`)
530	return StrLen * `3` + IsNegative;
531	if (Radix == `16`)
532	return StrLen * `4` + IsNegative;
533
534	// Compute a sufficient number of bits that is always large enough but might
535	// be too large. This avoids the assertion in the constructor. This
536	// calculation doesn't work appropriately for the numbers 0-9, so just use 4
537	// bits in that case.
538	if (Radix == `10`)
539	return (StrLen == `1` ? `4` : StrLen * `64` / `18`) + IsNegative;
540
541	assert(Radix == `36`);
542	return (StrLen == `1` ? `7` : StrLen * `16` / `3`) + IsNegative;
543	}
544
545	unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
546	// Compute a sufficient number of bits that is always large enough but might
547	// be too large.
548	unsigned sufficient = getSufficientBitsNeeded(Str: str, Radix: radix);
549
550	// For bases 2, 8, and 16, the sufficient number of bits is exact and we can
551	// return the value directly. For bases 10 and 36, we need to do extra work.
552	if (radix == `2` \|\| radix == `8` \|\| radix == `16`)
553	return sufficient;
554
555	// This is grossly inefficient but accurate. We could probably do something
556	// with a computation of roughly slen64/20 and then adjust by the value of*
557	// the first few digits. But, I'm not sure how accurate that could be.
558	size_t slen = str.size();
559
560	// Each computation below needs to know if it's negative.
561	StringRef::iterator p = str.begin();
562	unsigned isNegative = *p == `'-'`;
563	if (p == `'-'` \|\| p == `'+'`) {
564	p++;
565	slen--;
566	assert(slen && "String is only a sign, needs a value.");
567	}
568
569
570	// Convert to the actual binary value.
571	APInt tmp(sufficient, StringRef (p, slen), radix);
572
573	// Compute how many bits are required. If the log is infinite, assume we need
574	// just bit. If the log is exact and value is negative, then the value is
575	// MinSignedValue with (log + 1) bits.
576	unsigned log = tmp.logBase2();
577	if (log == (unsigned)-`1`) {
578	return isNegative + `1`;
579	} else if (isNegative && tmp.isPowerOf2()) {
580	return isNegative + log;
581	} else {
582	return isNegative + log + `1`;
583	}
584	}
585
586	hash_code llvm::hash_value(const APInt &Arg) {
587	if (Arg.isSingleWord())
588	return hash_combine(args: Arg.BitWidth, args: Arg.U.VAL);
589
590	return hash_combine(
591	args: Arg.BitWidth,
592	args: hash_combine_range(first: Arg.U.pVal, last: Arg.U.pVal + Arg.getNumWords()));
593	}
594
595	unsigned DenseMapInfo<APInt, void>::getHashValue(const APInt &Key) {
596	return static_cast<unsigned>(hash_value(Arg: Key));
597	}
598
599	bool APInt::isSplat(unsigned SplatSizeInBits) const {
600	assert(getBitWidth() % SplatSizeInBits == `0` &&
601	"SplatSizeInBits must divide width!");
602	// We can check that all parts of an integer are equal by making use of a
603	// little trick: rotate and check if it's still the same value.
604	return *this == rotl(rotateAmt: SplatSizeInBits);
605	}
606
607	/// This function returns the high "numBits" bits of this APInt.
608	APInt APInt::getHiBits(unsigned numBits) const {
609	return this->lshr(shiftAmt: BitWidth - numBits);
610	}
611
612	/// This function returns the low "numBits" bits of this APInt.
613	APInt APInt::getLoBits(unsigned numBits) const {
614	APInt Result(getLowBitsSet(numBits: BitWidth, loBitsSet: numBits));
615	Result &= *this;
616	return Result;
617	}
618
619	/// Return a value containing V broadcasted over NewLen bits.
620	APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
621	assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
622
623	APInt Val = V.zext(width: NewLen);
624	for (unsigned I = V.getBitWidth(); I < NewLen; I <<= `1`)
625	Val \|= Val << I;
626
627	return Val;
628	}
629
630	unsigned APInt::countLeadingZerosSlowCase() const {
631	unsigned Count = `0`;
632	for (int i = getNumWords()-`1`; i >= `0`; --i) {
633	uint64_t V = U.pVal[i];
634	if (V == `0`)
635	Count += APINT_BITS_PER_WORD;
636	else {
637	Count += llvm::countl_zero(Val: V);
638	break;
639	}
640	}
641	// Adjust for unused bits in the most significant word (they are zero).
642	unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
643	Count -= Mod > `0` ? APINT_BITS_PER_WORD - Mod : `0`;
644	return Count;
645	}
646
647	unsigned APInt::countLeadingOnesSlowCase() const {
648	unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
649	unsigned shift;
650	if (!highWordBits) {
651	highWordBits = APINT_BITS_PER_WORD;
652	shift = `0`;
653	} else {
654	shift = APINT_BITS_PER_WORD - highWordBits;
655	}
656	int i = getNumWords() - `1`;
657	unsigned Count = llvm::countl_one(Value: U.pVal[i] << shift);
658	if (Count == highWordBits) {
659	for (i--; i >= `0`; --i) {
660	if (U.pVal[i] == WORDTYPE_MAX)
661	Count += APINT_BITS_PER_WORD;
662	else {
663	Count += llvm::countl_one(Value: U.pVal[i]);
664	break;
665	}
666	}
667	}
668	return Count;
669	}
670
671	unsigned APInt::countTrailingZerosSlowCase() const {
672	unsigned Count = `0`;
673	unsigned i = `0`;
674	for (; i < getNumWords() && U.pVal[i] == `0`; ++i)
675	Count += APINT_BITS_PER_WORD;
676	if (i < getNumWords())
677	Count += llvm::countr_zero(Val: U.pVal[i]);
678	return std::min(a: Count, b: BitWidth);
679	}
680
681	unsigned APInt::countTrailingOnesSlowCase() const {
682	unsigned Count = `0`;
683	unsigned i = `0`;
684	for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
685	Count += APINT_BITS_PER_WORD;
686	if (i < getNumWords())
687	Count += llvm::countr_one(Value: U.pVal[i]);
688	assert(Count <= BitWidth);
689	return Count;
690	}
691
692	unsigned APInt::countPopulationSlowCase() const {
693	unsigned Count = `0`;
694	for (unsigned i = `0`; i < getNumWords(); ++i)
695	Count += llvm::popcount(Value: U.pVal[i]);
696	return Count;
697	}
698
699	bool APInt::intersectsSlowCase(const APInt &RHS) const {
700	for (unsigned i = `0`, e = getNumWords(); i != e; ++i)
701	if ((U.pVal[i] & RHS.U.pVal[i]) != `0`)
702	return true;
703
704	return false;
705	}
706
707	bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
708	for (unsigned i = `0`, e = getNumWords(); i != e; ++i)
709	if ((U.pVal[i] & ~RHS.U.pVal[i]) != `0`)
710	return false;
711
712	return true;
713	}
714
715	APInt APInt::byteSwap() const {
716	assert(BitWidth >= `16` && BitWidth % `8` == `0` && "Cannot byteswap!");
717	if (BitWidth == `16`)
718	return APInt (BitWidth, llvm::byteswap<uint16_t>(V: U.VAL));
719	if (BitWidth == `32`)
720	return APInt (BitWidth, llvm::byteswap<uint32_t>(V: U.VAL));
721	if (BitWidth <= `64`) {
722	uint64_t Tmp1 = llvm::byteswap<uint64_t>(V: U.VAL);
723	Tmp1 >>= (`64` - BitWidth);
724	return APInt (BitWidth, Tmp1);
725	}
726
727	APInt Result(getNumWords() * APINT_BITS_PER_WORD, `0`);
728	for (unsigned I = `0`, N = getNumWords(); I != N; ++I)
729	Result.U.pVal[I] = llvm::byteswap<uint64_t>(V: U.pVal[N - I - `1`]);
730	if (Result.BitWidth != BitWidth) {
731	Result.lshrInPlace(ShiftAmt: Result.BitWidth - BitWidth);
732	Result.BitWidth = BitWidth;
733	}
734	return Result;
735	}
736
737	APInt APInt::reverseBits() const {
738	switch (BitWidth) {
739	case `64`:
740	return APInt (BitWidth, llvm::reverseBits<uint64_t>(Val: U.VAL));
741	case `32`:
742	return APInt (BitWidth, llvm::reverseBits<uint32_t>(Val: U.VAL));
743	case `16`:
744	return APInt (BitWidth, llvm::reverseBits<uint16_t>(Val: U.VAL));
745	case `8`:
746	return APInt (BitWidth, llvm::reverseBits<uint8_t>(Val: U.VAL));
747	case `0`:
748	return *this;
749	default:
750	break;
751	}
752
753	APInt Val(*this);
754	APInt Reversed(BitWidth, `0`);
755	unsigned S = BitWidth;
756
757	for (; Val != `0`; Val.lshrInPlace(ShiftAmt: `1`)) {
758	Reversed <<= `1`;
759	Reversed \|= Val [`0`];
760	--S;
761	}
762
763	Reversed <<= S;
764	return Reversed;
765	}
766
767	APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
768	// Fast-path a common case.
769	if (A == B) return A;
770
771	// Corner cases: if either operand is zero, the other is the gcd.
772	if (!A) return B;
773	if (!B) return A;
774
775	// Count common powers of 2 and remove all other powers of 2.
776	unsigned Pow2;
777	{
778	unsigned Pow2_A = A.countr_zero();
779	unsigned Pow2_B = B.countr_zero();
780	if (Pow2_A > Pow2_B) {
781	A.lshrInPlace(ShiftAmt: Pow2_A - Pow2_B);
782	Pow2 = Pow2_B;
783	} else if (Pow2_B > Pow2_A) {
784	B.lshrInPlace(ShiftAmt: Pow2_B - Pow2_A);
785	Pow2 = Pow2_A;
786	} else {
787	Pow2 = Pow2_A;
788	}
789	}
790
791	// Both operands are odd multiples of 2^Pow_2:
792	//
793	// gcd(a, b) = gcd(\|a - b\| / 2^i, min(a, b))
794	//
795	// This is a modified version of Stein's algorithm, taking advantage of
796	// efficient countTrailingZeros().
797	while (A != B) {
798	if (A.ugt(RHS: B)) {
799	A -= B;
800	A.lshrInPlace(ShiftAmt: A.countr_zero() - Pow2);
801	} else {
802	B -= A;
803	B.lshrInPlace(ShiftAmt: B.countr_zero() - Pow2);
804	}
805	}
806
807	return A;
808	}
809
810	APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
811	uint64_t I = bit_cast<uint64_t>(from: Double);
812
813	// Get the sign bit from the highest order bit
814	bool isNeg = I >> `63`;
815
816	// Get the 11-bit exponent and adjust for the 1023 bit bias
817	int64_t exp = ((I >> `52`) & `0x7ff`) - `1023`;
818
819	// If the exponent is negative, the value is < 0 so just return 0.
820	if (exp < `0`)
821	return APInt (width, `0u`);
822
823	// Extract the mantissa by clearing the top 12 bits (sign + exponent).
824	uint64_t mantissa = (I & (~`0ULL` >> `12`)) \| `1ULL` << `52`;
825
826	// If the exponent doesn't shift all bits out of the mantissa
827	if (exp < `52`)
828	return isNeg ? -APInt (width, mantissa >> (`52` - exp)) :
829	APInt (width, mantissa >> (`52` - exp));
830
831	// If the client didn't provide enough bits for us to shift the mantissa into
832	// then the result is undefined, just return 0
833	if (width <= exp - `52`)
834	return APInt (width, `0`);
835
836	// Otherwise, we have to shift the mantissa bits up to the right location
837	APInt Tmp(width, mantissa);
838	Tmp <<= (unsigned)exp - `52`;
839	return isNeg ? -Tmp : Tmp;
840	}
841
842	/// This function converts this APInt to a double.
843	/// The layout for double is as following (IEEE Standard 754):
844	/// --------------------------------------
845	/// \| Sign Exponent Fraction Bias \|
846	/// \|-------------------------------------- \|
847	/// \| 1[63] 11[62-52] 52[51-00] 1023 \|
848	/// --------------------------------------
849	double APInt::roundToDouble(bool isSigned) const {
850
851	// Handle the simple case where the value is contained in one uint64_t.
852	// It is wrong to optimize getWord(0) to VAL; there might be more than one word.
853	if (isSingleWord() \|\| getActiveBits() <= APINT_BITS_PER_WORD) {
854	if (isSigned) {
855	int64_t sext = SignExtend64(X: getWord(bitPosition: `0`), B: BitWidth);
856	return double(sext);
857	} else
858	return double(getWord(bitPosition: `0`));
859	}
860
861	// Determine if the value is negative.
862	bool isNeg = isSigned ? (*this)[BitWidth-`1`] : false;
863
864	// Construct the absolute value if we're negative.
865	APInt Tmp(isNeg ? -(*this) : (*this));
866
867	// Figure out how many bits we're using.
868	unsigned n = Tmp.getActiveBits();
869
870	// The exponent (without bias normalization) is just the number of bits
871	// we are using. Note that the sign bit is gone since we constructed the
872	// absolute value.
873	uint64_t exp = n;
874
875	// Return infinity for exponent overflow
876	if (exp > `1023`) {
877	if (!isSigned \|\| !isNeg)
878	return std::numeric_limits<double>::infinity();
879	else
880	return -std::numeric_limits<double>::infinity();
881	}
882	exp += `1023`; // Increment for 1023 bias
883
884	// Number of bits in mantissa is 52. To obtain the mantissa value, we must
885	// extract the high 52 bits from the correct words in pVal.
886	uint64_t mantissa;
887	unsigned hiWord = whichWord(bitPosition: n-`1`);
888	if (hiWord == `0`) {
889	mantissa = Tmp.U.pVal[`0`];
890	if (n > `52`)
891	mantissa >>= n - `52`; // shift down, we want the top 52 bits.
892	} else {
893	assert(hiWord > `0` && "huh?");
894	uint64_t hibits = Tmp.U.pVal[hiWord] << (`52` - n % APINT_BITS_PER_WORD);
895	uint64_t lobits = Tmp.U.pVal[hiWord-`1`] >> (`11` + n % APINT_BITS_PER_WORD);
896	mantissa = hibits \| lobits;
897	}
898
899	// The leading bit of mantissa is implicit, so get rid of it.
900	uint64_t sign = isNeg ? (`1ULL` << (APINT_BITS_PER_WORD - `1`)) : `0`;
901	uint64_t I = sign \| (exp << `52`) \| mantissa;
902	return bit_cast<double>(from: I);
903	}
904
905	// Truncate to new width.
906	APInt APInt::trunc(unsigned width) const {
907	assert(width <= BitWidth && "Invalid APInt Truncate request");
908
909	if (width <= APINT_BITS_PER_WORD)
910	return APInt (width, getRawData()[`0`]);
911
912	if (width == BitWidth)
913	return *this;
914
915	APInt Result(getMemory(numWords: getNumWords(BitWidth: width)), width);
916
917	// Copy full words.
918	unsigned i;
919	for (i = `0`; i != width / APINT_BITS_PER_WORD; i++)
920	Result.U.pVal[i] = U.pVal[i];
921
922	// Truncate and copy any partial word.
923	unsigned bits = (`0` - width) % APINT_BITS_PER_WORD;
924	if (bits != `0`)
925	Result.U.pVal[i] = U.pVal[i] << bits >> bits;
926
927	return Result;
928	}
929
930	// Truncate to new width with unsigned saturation.
931	APInt APInt::truncUSat(unsigned width) const {
932	assert(width <= BitWidth && "Invalid APInt Truncate request");
933
934	// Can we just losslessly truncate it?
935	if (isIntN(N: width))
936	return trunc(width);
937	// If not, then just return the new limit.
938	return APInt::getMaxValue(numBits: width);
939	}
940
941	// Truncate to new width with signed saturation.
942	APInt APInt::truncSSat(unsigned width) const {
943	assert(width <= BitWidth && "Invalid APInt Truncate request");
944
945	// Can we just losslessly truncate it?
946	if (isSignedIntN(N: width))
947	return trunc(width);
948	// If not, then just return the new limits.
949	return isNegative() ? APInt::getSignedMinValue(numBits: width)
950	: APInt::getSignedMaxValue(numBits: width);
951	}
952
953	// Sign extend to a new width.
954	APInt APInt::sext(unsigned Width) const {
955	assert(Width >= BitWidth && "Invalid APInt SignExtend request");
956
957	if (Width <= APINT_BITS_PER_WORD)
958	return APInt (Width, SignExtend64(X: U.VAL, B: BitWidth));
959
960	if (Width == BitWidth)
961	return *this;
962
963	APInt Result(getMemory(numWords: getNumWords(BitWidth: Width)), Width);
964
965	// Copy words.
966	std::memcpy(dest: Result.U.pVal, src: getRawData(), n: getNumWords() * APINT_WORD_SIZE);
967
968	// Sign extend the last word since there may be unused bits in the input.
969	Result.U.pVal[getNumWords() - `1`] =
970	SignExtend64(X: Result.U.pVal[getNumWords() - `1`],
971	B: ((BitWidth - `1`) % APINT_BITS_PER_WORD) + `1`);
972
973	// Fill with sign bits.
974	std::memset(s: Result.U.pVal + getNumWords(), c: isNegative() ? -`1` : `0`,
975	n: (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
976	Result.clearUnusedBits();
977	return Result;
978	}
979
980	// Zero extend to a new width.
981	APInt APInt::zext(unsigned width) const {
982	assert(width >= BitWidth && "Invalid APInt ZeroExtend request");
983
984	if (width <= APINT_BITS_PER_WORD)
985	return APInt (width, U.VAL);
986
987	if (width == BitWidth)
988	return *this;
989
990	APInt Result(getMemory(numWords: getNumWords(BitWidth: width)), width);
991
992	// Copy words.
993	std::memcpy(dest: Result.U.pVal, src: getRawData(), n: getNumWords() * APINT_WORD_SIZE);
994
995	// Zero remaining words.
996	std::memset(s: Result.U.pVal + getNumWords(), c: `0`,
997	n: (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
998
999	return Result;
1000	}
1001
1002	APInt APInt::zextOrTrunc(unsigned width) const {
1003	if (BitWidth < width)
1004	return zext(width);
1005	if (BitWidth > width)
1006	return trunc(width);
1007	return *this;
1008	}
1009
1010	APInt APInt::sextOrTrunc(unsigned width) const {
1011	if (BitWidth < width)
1012	return sext(Width: width);
1013	if (BitWidth > width)
1014	return trunc(width);
1015	return *this;
1016	}
1017
1018	/// Arithmetic right-shift this APInt by shiftAmt.
1019	/// Arithmetic right-shift function.
1020	void APInt::ashrInPlace(const APInt &shiftAmt) {
1021	ashrInPlace(ShiftAmt: (unsigned)shiftAmt.getLimitedValue(Limit: BitWidth));
1022	}
1023
1024	/// Arithmetic right-shift this APInt by shiftAmt.
1025	/// Arithmetic right-shift function.
1026	void APInt::ashrSlowCase(unsigned ShiftAmt) {
1027	// Don't bother performing a no-op shift.
1028	if (!ShiftAmt)
1029	return;
1030
1031	// Save the original sign bit for later.
1032	bool Negative = isNegative();
1033
1034	// WordShift is the inter-part shift; BitShift is intra-part shift.
1035	unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
1036	unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
1037
1038	unsigned WordsToMove = getNumWords() - WordShift;
1039	if (WordsToMove != `0`) {
1040	// Sign extend the last word to fill in the unused bits.
1041	U.pVal[getNumWords() - `1`] = SignExtend64(
1042	X: U.pVal[getNumWords() - `1`], B: ((BitWidth - `1`) % APINT_BITS_PER_WORD) + `1`);
1043
1044	// Fastpath for moving by whole words.
1045	if (BitShift == `0`) {
1046	std::memmove(dest: U.pVal, src: U.pVal + WordShift, n: WordsToMove * APINT_WORD_SIZE);
1047	} else {
1048	// Move the words containing significant bits.
1049	for (unsigned i = `0`; i != WordsToMove - `1`; ++i)
1050	U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) \|
1051	(U.pVal[i + WordShift + `1`] << (APINT_BITS_PER_WORD - BitShift));
1052
1053	// Handle the last word which has no high bits to copy.
1054	U.pVal[WordsToMove - `1`] = U.pVal[WordShift + WordsToMove - `1`] >> BitShift;
1055	// Sign extend one more time.
1056	U.pVal[WordsToMove - `1`] =
1057	SignExtend64(X: U.pVal[WordsToMove - `1`], B: APINT_BITS_PER_WORD - BitShift);
1058	}
1059	}
1060
1061	// Fill in the remainder based on the original sign.
1062	std::memset(s: U.pVal + WordsToMove, c: Negative ? -`1` : `0`,
1063	n: WordShift * APINT_WORD_SIZE);
1064	clearUnusedBits();
1065	}
1066
1067	/// Logical right-shift this APInt by shiftAmt.
1068	/// Logical right-shift function.
1069	void APInt::lshrInPlace(const APInt &shiftAmt) {
1070	lshrInPlace(ShiftAmt: (unsigned)shiftAmt.getLimitedValue(Limit: BitWidth));
1071	}
1072
1073	/// Logical right-shift this APInt by shiftAmt.
1074	/// Logical right-shift function.
1075	void APInt::lshrSlowCase(unsigned ShiftAmt) {
1076	tcShiftRight(U.pVal, Words: getNumWords(), Count: ShiftAmt);
1077	}
1078
1079	/// Left-shift this APInt by shiftAmt.
1080	/// Left-shift function.
1081	APInt &APInt::operator<<=(const APInt &shiftAmt) {
1082	// It's undefined behavior in C to shift by BitWidth or greater.
1083	*this <<= (unsigned)shiftAmt.getLimitedValue(Limit: BitWidth);
1084	return *this;
1085	}
1086
1087	void APInt::shlSlowCase(unsigned ShiftAmt) {
1088	tcShiftLeft(U.pVal, Words: getNumWords(), Count: ShiftAmt);
1089	clearUnusedBits();
1090	}
1091
1092	// Calculate the rotate amount modulo the bit width.
1093	static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1094	if (LLVM_UNLIKELY(BitWidth == `0`))
1095	return `0`;
1096	unsigned rotBitWidth = rotateAmt.getBitWidth();
1097	APInt rot = rotateAmt;
1098	if (rotBitWidth < BitWidth) {
1099	// Extend the rotate APInt, so that the urem doesn't divide by 0.
1100	// e.g. APInt(1, 32) would give APInt(1, 0).
1101	rot = rotateAmt.zext(width: BitWidth);
1102	}
1103	rot = rot.urem(RHS: APInt (rot.getBitWidth(), BitWidth));
1104	return rot.getLimitedValue(Limit: BitWidth);
1105	}
1106
1107	APInt APInt::rotl(const APInt &rotateAmt) const {
1108	return rotl(rotateAmt: rotateModulo(BitWidth, rotateAmt));
1109	}
1110
1111	APInt APInt::rotl(unsigned rotateAmt) const {
1112	if (LLVM_UNLIKELY(BitWidth == `0`))
1113	return *this;
1114	rotateAmt %= BitWidth;
1115	if (rotateAmt == `0`)
1116	return *this;
1117	return shl(shiftAmt: rotateAmt) \| lshr(shiftAmt: BitWidth - rotateAmt);
1118	}
1119
1120	APInt APInt::rotr(const APInt &rotateAmt) const {
1121	return rotr(rotateAmt: rotateModulo(BitWidth, rotateAmt));
1122	}
1123
1124	APInt APInt::rotr(unsigned rotateAmt) const {
1125	if (BitWidth == `0`)
1126	return *this;
1127	rotateAmt %= BitWidth;
1128	if (rotateAmt == `0`)
1129	return *this;
1130	return lshr(shiftAmt: rotateAmt) \| shl(shiftAmt: BitWidth - rotateAmt);
1131	}
1132
1133	/// \returns the nearest log base 2 of this APInt. Ties round up.
1134	///
1135	/// NOTE: When we have a BitWidth of 1, we define:
1136	///
1137	/// log2(0) = UINT32_MAX
1138	/// log2(1) = 0
1139	///
1140	/// to get around any mathematical concerns resulting from
1141	/// referencing 2 in a space where 2 does no exist.
1142	unsigned APInt::nearestLogBase2() const {
1143	// Special case when we have a bitwidth of 1. If VAL is 1, then we
1144	// get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
1145	// UINT32_MAX.
1146	if (BitWidth == `1`)
1147	return U.VAL - `1`;
1148
1149	// Handle the zero case.
1150	if (isZero())
1151	return UINT32_MAX;
1152
1153	// The non-zero case is handled by computing:
1154	//
1155	// nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
1156	//
1157	// where x[i] is referring to the value of the ith bit of x.
1158	unsigned lg = logBase2();
1159	return lg + unsigned((*this)[lg - `1`]);
1160	}
1161
1162	// Square Root - this method computes and returns the square root of "this".
1163	// Three mechanisms are used for computation. For small values (<= 5 bits),
1164	// a table lookup is done. This gets some performance for common cases. For
1165	// values using less than 52 bits, the value is converted to double and then
1166	// the libc sqrt function is called. The result is rounded and then converted
1167	// back to a uint64_t which is then used to construct the result. Finally,
1168	// the Babylonian method for computing square roots is used.
1169	APInt APInt::sqrt() const {
1170
1171	// Determine the magnitude of the value.
1172	unsigned magnitude = getActiveBits();
1173
1174	// Use a fast table for some small values. This also gets rid of some
1175	// rounding errors in libc sqrt for small values.
1176	if (magnitude <= `5`) {
1177	static const uint8_t results[`32`] = {
1178	/ 0 / `0`,
1179	/ 1- 2 / `1`, `1`,
1180	/ 3- 6 / `2`, `2`, `2`, `2`,
1181	/ 7-12 / `3`, `3`, `3`, `3`, `3`, `3`,
1182	/ 13-20 / `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
1183	/ 21-30 / `5`, `5`, `5`, `5`, `5`, `5`, `5`, `5`, `5`, `5`,
1184	/ 31 / `6`
1185	};
1186	return APInt (BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[`0`]) ]);
1187	}
1188
1189	// If the magnitude of the value fits in less than 52 bits (the precision of
1190	// an IEEE double precision floating point value), then we can use the
1191	// libc sqrt function which will probably use a hardware sqrt computation.
1192	// This should be faster than the algorithm below.
1193	if (magnitude < `52`) {
1194	return APInt (BitWidth,
1195	uint64_t(::round(x: ::sqrt(x: double(isSingleWord() ? U.VAL
1196	: U.pVal[`0`])))));
1197	}
1198
1199	// Okay, all the short cuts are exhausted. We must compute it. The following
1200	// is a classical Babylonian method for computing the square root. This code
1201	// was adapted to APInt from a wikipedia article on such computations.
1202	// See http://www.wikipedia.org/ and go to the page named
1203	// Calculate_an_integer_square_root.
1204	unsigned nbits = BitWidth, i = `4`;
1205	APInt testy(BitWidth, `16`);
1206	APInt x_old(BitWidth, `1`);
1207	APInt x_new(BitWidth, `0`);
1208	APInt two(BitWidth, `2`);
1209
1210	// Select a good starting value using binary logarithms.
1211	for (;; i += `2`, testy = testy.shl(shiftAmt: `2`))
1212	if (i >= nbits \|\| this->ule(RHS: testy)) {
1213	x_old = x_old.shl(shiftAmt: i / `2`);
1214	break;
1215	}
1216
1217	// Use the Babylonian method to arrive at the integer square root:
1218	for (;;) {
1219	x_new = (this->udiv(RHS: x_old) + x_old).udiv(RHS: two);
1220	if (x_old.ule(RHS: x_new))
1221	break;
1222	x_old = x_new;
1223	}
1224
1225	// Make sure we return the closest approximation
1226	// NOTE: The rounding calculation below is correct. It will produce an
1227	// off-by-one discrepancy with results from pari/gp. That discrepancy has been
1228	// determined to be a rounding issue with pari/gp as it begins to use a
1229	// floating point representation after 192 bits. There are no discrepancies
1230	// between this algorithm and pari/gp for bit widths < 192 bits.
1231	APInt square(x_old * x_old);
1232	APInt nextSquare((x_old + `1`) * (x_old +`1`));
1233	if (this->ult(RHS: square))
1234	return x_old;
1235	assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1236	APInt midpoint((nextSquare - square).udiv(RHS: two));
1237	APInt offset(*this - square);
1238	if (offset.ult(RHS: midpoint))
1239	return x_old;
1240	return x_old + `1`;
1241	}
1242
1243	/// \returns the multiplicative inverse of an odd APInt modulo 2^BitWidth.
1244	APInt APInt::multiplicativeInverse() const {
1245	assert((*this)[`0`] &&
1246	"multiplicative inverse is only defined for odd numbers!");
1247
1248	// Use Newton's method.
1249	APInt Factor = *this;
1250	APInt T;
1251	while (!(T = *this * Factor).isOne())
1252	Factor *= `2` - std::move(T);
1253	return Factor;
1254	}
1255
1256	/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1257	/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1258	/// variables here have the same names as in the algorithm. Comments explain
1259	/// the algorithm and any deviation from it.
1260	static void KnuthDiv(uint32_t u, uint32_t v, uint32_t q, uint32_t r,
1261	unsigned m, unsigned n) {
1262	assert(u && "Must provide dividend");
1263	assert(v && "Must provide divisor");
1264	assert(q && "Must provide quotient");
1265	assert(u != v && u != q && v != q && "Must use different memory");
1266	assert(n>`1` && "n must be > 1");
1267
1268	// b denotes the base of the number system. In our case b is 2^32.
1269	const uint64_t b = uint64_t(`1`) << `32`;
1270
1271	// The DEBUG macros here tend to be spam in the debug output if you're not
1272	// debugging this code. Disable them unless KNUTH_DEBUG is defined.
1273	#ifdef KNUTH_DEBUG
1274	#define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1275	#else
1276	#define DEBUG_KNUTH(X) do {} while(false)
1277	#endif
1278
1279	DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << `'\n'`);
1280	DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1281	DEBUG_KNUTH(for (int i = m + n; i >= `0`; i--) dbgs() << " " << u[i]);
1282	DEBUG_KNUTH(dbgs() << " by");
1283	DEBUG_KNUTH(for (int i = n; i > `0`; i--) dbgs() << " " << v[i - `1`]);
1284	DEBUG_KNUTH(dbgs() << `'\n'`);
1285	// D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1286	// u and v by d. Note that we have taken Knuth's advice here to use a power
1287	// of 2 value for d such that d v[n-1] >= b/2 (b is the base). A power of*
1288	// 2 allows us to shift instead of multiply and it is easy to determine the
1289	// shift amount from the leading zeros. We are basically normalizing the u
1290	// and v so that its high bits are shifted to the top of v's range without
1291	// overflow. Note that this can require an extra word in u so that u must
1292	// be of length m+n+1.
1293	unsigned shift = llvm::countl_zero(Val: v[n - `1`]);
1294	uint32_t v_carry = `0`;
1295	uint32_t u_carry = `0`;
1296	if (shift) {
1297	for (unsigned i = `0`; i < m+n; ++i) {
1298	uint32_t u_tmp = u[i] >> (`32` - shift);
1299	u[i] = (u[i] << shift) \| u_carry;
1300	u_carry = u_tmp;
1301	}
1302	for (unsigned i = `0`; i < n; ++i) {
1303	uint32_t v_tmp = v[i] >> (`32` - shift);
1304	v[i] = (v[i] << shift) \| v_carry;
1305	v_carry = v_tmp;
1306	}
1307	}
1308	u[m+n] = u_carry;
1309
1310	DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:");
1311	DEBUG_KNUTH(for (int i = m + n; i >= `0`; i--) dbgs() << " " << u[i]);
1312	DEBUG_KNUTH(dbgs() << " by");
1313	DEBUG_KNUTH(for (int i = n; i > `0`; i--) dbgs() << " " << v[i - `1`]);
1314	DEBUG_KNUTH(dbgs() << `'\n'`);
1315
1316	// D2. [Initialize j.] Set j to m. This is the loop counter over the places.
1317	int j = m;
1318	do {
1319	DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << `'\n'`);
1320	// D3. [Calculate q'.].
1321	// Set qp = (u[j+n]b + u[j+n-1]) / v[n-1]. (qp=qprime=q')*
1322	// Set rp = (u[j+n]b + u[j+n-1]) % v[n-1]. (rp=rprime=r')*
1323	// Now test if qp == b or qpv[n-2] > brp + u[j+n-2]; if so, decrease
1324	// qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1325	// on v[n-2] determines at high speed most of the cases in which the trial
1326	// value qp is one too large, and it eliminates all cases where qp is two
1327	// too large.
1328	uint64_t dividend = Make_64(High: u[j+n], Low: u[j+n-`1`]);
1329	DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << `'\n'`);
1330	uint64_t qp = dividend / v[n-`1`];
1331	uint64_t rp = dividend % v[n-`1`];
1332	if (qp == b \|\| qpv[n-`2`] > brp + u[j+n-`2`]) {
1333	qp--;
1334	rp += v[n-`1`];
1335	if (rp < b && (qp == b \|\| qpv[n-`2`] > brp + u[j+n-`2`]))
1336	qp--;
1337	}
1338	DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << `'\n'`);
1339
1340	// D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1341	// (u[j+n]u[j+n-1]..u[j]) - qp (v[n-1]...v[1]v[0]). This computation*
1342	// consists of a simple multiplication by a one-place number, combined with
1343	// a subtraction.
1344	// The digits (u[j+n]...u[j]) should be kept positive; if the result of
1345	// this step is actually negative, (u[j+n]...u[j]) should be left as the
1346	// true value plus b(n+1), namely as the b's complement of
1347	// the true value, and a "borrow" to the left should be remembered.
1348	int64_t borrow = `0`;
1349	for (unsigned i = `0`; i < n; ++i) {
1350	uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1351	int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(Value: p);
1352	u[j+i] = Lo_32(Value: subres);
1353	borrow = Hi_32(Value: p) - Hi_32(Value: subres);
1354	DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1355	<< ", borrow = " << borrow << `'\n'`);
1356	}
1357	bool isNeg = u[j+n] < borrow;
1358	u[j+n] -= Lo_32(Value: borrow);
1359
1360	DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1361	DEBUG_KNUTH(for (int i = m + n; i >= `0`; i--) dbgs() << " " << u[i]);
1362	DEBUG_KNUTH(dbgs() << `'\n'`);
1363
1364	// D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1365	// negative, go to step D6; otherwise go on to step D7.
1366	q[j] = Lo_32(Value: qp);
1367	if (isNeg) {
1368	// D6. [Add back]. The probability that this step is necessary is very
1369	// small, on the order of only 2/b. Make sure that test data accounts for
1370	// this possibility. Decrease q[j] by 1
1371	q[j]--;
1372	// and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1373	// A carry will occur to the left of u[j+n], and it should be ignored
1374	// since it cancels with the borrow that occurred in D4.
1375	bool carry = false;
1376	for (unsigned i = `0`; i < n; i++) {
1377	uint32_t limit = std::min(a: u[j+i],b: v[i]);
1378	u[j+i] += v[i] + carry;
1379	carry = u[j+i] < limit \|\| (carry && u[j+i] == limit);
1380	}
1381	u[j+n] += carry;
1382	}
1383	DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1384	DEBUG_KNUTH(for (int i = m + n; i >= `0`; i--) dbgs() << " " << u[i]);
1385	DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << `'\n'`);
1386
1387	// D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
1388	} while (--j >= `0`);
1389
1390	DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1391	DEBUG_KNUTH(for (int i = m; i >= `0`; i--) dbgs() << " " << q[i]);
1392	DEBUG_KNUTH(dbgs() << `'\n'`);
1393
1394	// D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1395	// remainder may be obtained by dividing u[...] by d. If r is non-null we
1396	// compute the remainder (urem uses this).
1397	if (r) {
1398	// The value d is expressed by the "shift" value above since we avoided
1399	// multiplication by d by using a shift left. So, all we have to do is
1400	// shift right here.
1401	if (shift) {
1402	uint32_t carry = `0`;
1403	DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1404	for (int i = n-`1`; i >= `0`; i--) {
1405	r[i] = (u[i] >> shift) \| carry;
1406	carry = u[i] << (`32` - shift);
1407	DEBUG_KNUTH(dbgs() << " " << r[i]);
1408	}
1409	} else {
1410	for (int i = n-`1`; i >= `0`; i--) {
1411	r[i] = u[i];
1412	DEBUG_KNUTH(dbgs() << " " << r[i]);
1413	}
1414	}
1415	DEBUG_KNUTH(dbgs() << `'\n'`);
1416	}
1417	DEBUG_KNUTH(dbgs() << `'\n'`);
1418	}
1419
1420	void APInt::divide(const WordType LHS, unsigned* lhsWords, const WordType *RHS,
1421	unsigned rhsWords, WordType Quotient, WordType Remainder) {
1422	assert(lhsWords >= rhsWords && "Fractional result");
1423
1424	// First, compose the values into an array of 32-bit words instead of
1425	// 64-bit words. This is a necessity of both the "short division" algorithm
1426	// and the Knuth "classical algorithm" which requires there to be native
1427	// operations for +, -, and on an m bit value with an m2 bit result. We
1428	// can't use 64-bit operands here because we don't have native results of
1429	// 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1430	// work on large-endian machines.
1431	unsigned n = rhsWords * `2`;
1432	unsigned m = (lhsWords * `2`) - n;
1433
1434	// Allocate space for the temporary values we need either on the stack, if
1435	// it will fit, or on the heap if it won't.
1436	uint32_t SPACE[`128`];
1437	uint32_t U = nullptr*;
1438	uint32_t V = nullptr*;
1439	uint32_t Q = nullptr*;
1440	uint32_t R = nullptr*;
1441	if ((Remainder?`4`:`3`)n+`2`m+`1` <= `128`) {
1442	U = &SPACE[`0`];
1443	V = &SPACE[m+n+`1`];
1444	Q = &SPACE[(m+n+`1`) + n];
1445	if (Remainder)
1446	R = &SPACE[(m+n+`1`) + n + (m+n)];
1447	} else {
1448	U = new uint32_t[m + n + `1`];
1449	V = new uint32_t[n];
1450	Q = new uint32_t[m+n];
1451	if (Remainder)
1452	R = new uint32_t[n];
1453	}
1454
1455	// Initialize the dividend
1456	memset(s: U, c: `0`, n: (m+n+`1`)*sizeof(uint32_t));
1457	for (unsigned i = `0`; i < lhsWords; ++i) {
1458	uint64_t tmp = LHS[i];
1459	U[i * `2`] = Lo_32(Value: tmp);
1460	U[i * `2` + `1`] = Hi_32(Value: tmp);
1461	}
1462	U[m+n] = `0`; // this extra word is for "spill" in the Knuth algorithm.
1463
1464	// Initialize the divisor
1465	memset(s: V, c: `0`, n: (n)*sizeof(uint32_t));
1466	for (unsigned i = `0`; i < rhsWords; ++i) {
1467	uint64_t tmp = RHS[i];
1468	V[i * `2`] = Lo_32(Value: tmp);
1469	V[i * `2` + `1`] = Hi_32(Value: tmp);
1470	}
1471
1472	// initialize the quotient and remainder
1473	memset(s: Q, c: `0`, n: (m+n) * sizeof(uint32_t));
1474	if (Remainder)
1475	memset(s: R, c: `0`, n: n * sizeof(uint32_t));
1476
1477	// Now, adjust m and n for the Knuth division. n is the number of words in
1478	// the divisor. m is the number of words by which the dividend exceeds the
1479	// divisor (i.e. m+n is the length of the dividend). These sizes must not
1480	// contain any zero words or the Knuth algorithm fails.
1481	for (unsigned i = n; i > `0` && V[i-`1`] == `0`; i--) {
1482	n--;
1483	m++;
1484	}
1485	for (unsigned i = m+n; i > `0` && U[i-`1`] == `0`; i--)
1486	m--;
1487
1488	// If we're left with only a single word for the divisor, Knuth doesn't work
1489	// so we implement the short division algorithm here. This is much simpler
1490	// and faster because we are certain that we can divide a 64-bit quantity
1491	// by a 32-bit quantity at hardware speed and short division is simply a
1492	// series of such operations. This is just like doing short division but we
1493	// are using base 2^32 instead of base 10.
1494	assert(n != `0` && "Divide by zero?");
1495	if (n == `1`) {
1496	uint32_t divisor = V[`0`];
1497	uint32_t remainder = `0`;
1498	for (int i = m; i >= `0`; i--) {
1499	uint64_t partial_dividend = Make_64(High: remainder, Low: U[i]);
1500	if (partial_dividend == `0`) {
1501	Q[i] = `0`;
1502	remainder = `0`;
1503	} else if (partial_dividend < divisor) {
1504	Q[i] = `0`;
1505	remainder = Lo_32(Value: partial_dividend);
1506	} else if (partial_dividend == divisor) {
1507	Q[i] = `1`;
1508	remainder = `0`;
1509	} else {
1510	Q[i] = Lo_32(Value: partial_dividend / divisor);
1511	remainder = Lo_32(Value: partial_dividend - (Q[i] * divisor));
1512	}
1513	}
1514	if (R)
1515	R[`0`] = remainder;
1516	} else {
1517	// Now we're ready to invoke the Knuth classical divide algorithm. In this
1518	// case n > 1.
1519	KnuthDiv(u: U, v: V, q: Q, r: R, m, n);
1520	}
1521
1522	// If the caller wants the quotient
1523	if (Quotient) {
1524	for (unsigned i = `0`; i < lhsWords; ++i)
1525	Quotient[i] = Make_64(High: Q[i`2`+`1`], Low: Q[i`2`]);
1526	}
1527
1528	// If the caller wants the remainder
1529	if (Remainder) {
1530	for (unsigned i = `0`; i < rhsWords; ++i)
1531	Remainder[i] = Make_64(High: R[i`2`+`1`], Low: R[i`2`]);
1532	}
1533
1534	// Clean up the memory we allocated.
1535	if (U != &SPACE[`0`]) {
1536	delete [] U;
1537	delete [] V;
1538	delete [] Q;
1539	delete [] R;
1540	}
1541	}
1542
1543	APInt APInt::udiv(const APInt &RHS) const {
1544	assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1545
1546	// First, deal with the easy case
1547	if (isSingleWord()) {
1548	assert(RHS.U.VAL != `0` && "Divide by zero?");
1549	return APInt (BitWidth, U.VAL / RHS.U.VAL);
1550	}
1551
1552	// Get some facts about the LHS and RHS number of bits and words
1553	unsigned lhsWords = getNumWords(BitWidth: getActiveBits());
1554	unsigned rhsBits = RHS.getActiveBits();
1555	unsigned rhsWords = getNumWords(BitWidth: rhsBits);
1556	assert(rhsWords && "Divided by zero???");
1557
1558	// Deal with some degenerate cases
1559	if (!lhsWords)
1560	// 0 / X ===> 0
1561	return APInt (BitWidth, `0`);
1562	if (rhsBits == `1`)
1563	// X / 1 ===> X
1564	return *this;
1565	if (lhsWords < rhsWords \|\| this->ult(RHS))
1566	// X / Y ===> 0, iff X < Y
1567	return APInt (BitWidth, `0`);
1568	if (*this == RHS)
1569	// X / X ===> 1
1570	return APInt (BitWidth, `1`);
1571	if (lhsWords == `1`) // rhsWords is 1 if lhsWords is 1.
1572	// All high words are zero, just use native divide
1573	return APInt (BitWidth, this->U.pVal[`0`] / RHS.U.pVal[`0`]);
1574
1575	// We have to compute it the hard way. Invoke the Knuth divide algorithm.
1576	APInt Quotient(BitWidth, `0`); // to hold result.
1577	divide(LHS: U.pVal, lhsWords, RHS: RHS.U.pVal, rhsWords, Quotient: Quotient.U.pVal, Remainder: nullptr);
1578	return Quotient;
1579	}
1580
1581	APInt APInt::udiv(uint64_t RHS) const {
1582	assert(RHS != `0` && "Divide by zero?");
1583
1584	// First, deal with the easy case
1585	if (isSingleWord())
1586	return APInt (BitWidth, U.VAL / RHS);
1587
1588	// Get some facts about the LHS words.
1589	unsigned lhsWords = getNumWords(BitWidth: getActiveBits());
1590
1591	// Deal with some degenerate cases
1592	if (!lhsWords)
1593	// 0 / X ===> 0
1594	return APInt (BitWidth, `0`);
1595	if (RHS == `1`)
1596	// X / 1 ===> X
1597	return *this;
1598	if (this->ult(RHS))
1599	// X / Y ===> 0, iff X < Y
1600	return APInt (BitWidth, `0`);
1601	if (*this == RHS)
1602	// X / X ===> 1
1603	return APInt (BitWidth, `1`);
1604	if (lhsWords == `1`) // rhsWords is 1 if lhsWords is 1.
1605	// All high words are zero, just use native divide
1606	return APInt (BitWidth, this->U.pVal[`0`] / RHS);
1607
1608	// We have to compute it the hard way. Invoke the Knuth divide algorithm.
1609	APInt Quotient(BitWidth, `0`); // to hold result.
1610	divide(LHS: U.pVal, lhsWords, RHS: &RHS, rhsWords: `1`, Quotient: Quotient.U.pVal, Remainder: nullptr);
1611	return Quotient;
1612	}
1613
1614	APInt APInt::sdiv(const APInt &RHS) const {
1615	if (isNegative()) {
1616	if (RHS.isNegative())
1617	return (-(*this)).udiv(RHS: -RHS);
1618	return -((-(*this)).udiv(RHS));
1619	}
1620	if (RHS.isNegative())
1621	return -(this->udiv(RHS: -RHS));
1622	return this->udiv(RHS);
1623	}
1624
1625	APInt APInt::sdiv(int64_t RHS) const {
1626	if (isNegative()) {
1627	if (RHS < `0`)
1628	return (-(*this)).udiv(RHS: -RHS);
1629	return -((-(*this)).udiv(RHS));
1630	}
1631	if (RHS < `0`)
1632	return -(this->udiv(RHS: -RHS));
1633	return this->udiv(RHS);
1634	}
1635
1636	APInt APInt::urem(const APInt &RHS) const {
1637	assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1638	if (isSingleWord()) {
1639	assert(RHS.U.VAL != `0` && "Remainder by zero?");
1640	return APInt (BitWidth, U.VAL % RHS.U.VAL);
1641	}
1642
1643	// Get some facts about the LHS
1644	unsigned lhsWords = getNumWords(BitWidth: getActiveBits());
1645
1646	// Get some facts about the RHS
1647	unsigned rhsBits = RHS.getActiveBits();
1648	unsigned rhsWords = getNumWords(BitWidth: rhsBits);
1649	assert(rhsWords && "Performing remainder operation by zero ???");
1650
1651	// Check the degenerate cases
1652	if (lhsWords == `0`)
1653	// 0 % Y ===> 0
1654	return APInt (BitWidth, `0`);
1655	if (rhsBits == `1`)
1656	// X % 1 ===> 0
1657	return APInt (BitWidth, `0`);
1658	if (lhsWords < rhsWords \|\| this->ult(RHS))
1659	// X % Y ===> X, iff X < Y
1660	return *this;
1661	if (*this == RHS)
1662	// X % X == 0;
1663	return APInt (BitWidth, `0`);
1664	if (lhsWords == `1`)
1665	// All high words are zero, just use native remainder
1666	return APInt (BitWidth, U.pVal[`0`] % RHS.U.pVal[`0`]);
1667
1668	// We have to compute it the hard way. Invoke the Knuth divide algorithm.
1669	APInt Remainder(BitWidth, `0`);
1670	divide(LHS: U.pVal, lhsWords, RHS: RHS.U.pVal, rhsWords, Quotient: nullptr, Remainder: Remainder.U.pVal);
1671	return Remainder;
1672	}
1673
1674	uint64_t APInt::urem(uint64_t RHS) const {
1675	assert(RHS != `0` && "Remainder by zero?");
1676
1677	if (isSingleWord())
1678	return U.VAL % RHS;
1679
1680	// Get some facts about the LHS
1681	unsigned lhsWords = getNumWords(BitWidth: getActiveBits());
1682
1683	// Check the degenerate cases
1684	if (lhsWords == `0`)
1685	// 0 % Y ===> 0
1686	return `0`;
1687	if (RHS == `1`)
1688	// X % 1 ===> 0
1689	return `0`;
1690	if (this->ult(RHS))
1691	// X % Y ===> X, iff X < Y
1692	return getZExtValue();
1693	if (*this == RHS)
1694	// X % X == 0;
1695	return `0`;
1696	if (lhsWords == `1`)
1697	// All high words are zero, just use native remainder
1698	return U.pVal[`0`] % RHS;
1699
1700	// We have to compute it the hard way. Invoke the Knuth divide algorithm.
1701	uint64_t Remainder;
1702	divide(LHS: U.pVal, lhsWords, RHS: &RHS, rhsWords: `1`, Quotient: nullptr, Remainder: &Remainder);
1703	return Remainder;
1704	}
1705
1706	APInt APInt::srem(const APInt &RHS) const {
1707	if (isNegative()) {
1708	if (RHS.isNegative())
1709	return -((-(*this)).urem(RHS: -RHS));
1710	return -((-(*this)).urem(RHS));
1711	}
1712	if (RHS.isNegative())
1713	return this->urem(RHS: -RHS);
1714	return this->urem(RHS);
1715	}
1716
1717	int64_t APInt::srem(int64_t RHS) const {
1718	if (isNegative()) {
1719	if (RHS < `0`)
1720	return -((-(*this)).urem(RHS: -RHS));
1721	return -((-(*this)).urem(RHS));
1722	}
1723	if (RHS < `0`)
1724	return this->urem(RHS: -RHS);
1725	return this->urem(RHS);
1726	}
1727
1728	void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1729	APInt &Quotient, APInt &Remainder) {
1730	assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1731	unsigned BitWidth = LHS.BitWidth;
1732
1733	// First, deal with the easy case
1734	if (LHS.isSingleWord()) {
1735	assert(RHS.U.VAL != `0` && "Divide by zero?");
1736	uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1737	uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1738	Quotient = APInt (BitWidth, QuotVal);
1739	Remainder = APInt (BitWidth, RemVal);
1740	return;
1741	}
1742
1743	// Get some size facts about the dividend and divisor
1744	unsigned lhsWords = getNumWords(BitWidth: LHS.getActiveBits());
1745	unsigned rhsBits = RHS.getActiveBits();
1746	unsigned rhsWords = getNumWords(BitWidth: rhsBits);
1747	assert(rhsWords && "Performing divrem operation by zero ???");
1748
1749	// Check the degenerate cases
1750	if (lhsWords == `0`) {
1751	Quotient = APInt (BitWidth, `0`); // 0 / Y ===> 0
1752	Remainder = APInt (BitWidth, `0`); // 0 % Y ===> 0
1753	return;
1754	}
1755
1756	if (rhsBits == `1`) {
1757	Quotient = LHS; // X / 1 ===> X
1758	Remainder = APInt (BitWidth, `0`); // X % 1 ===> 0
1759	}
1760
1761	if (lhsWords < rhsWords \|\| LHS.ult(RHS)) {
1762	Remainder = LHS; // X % Y ===> X, iff X < Y
1763	Quotient = APInt (BitWidth, `0`); // X / Y ===> 0, iff X < Y
1764	return;
1765	}
1766
1767	if (LHS == RHS) {
1768	Quotient = APInt (BitWidth, `1`); // X / X ===> 1
1769	Remainder = APInt (BitWidth, `0`); // X % X ===> 0;
1770	return;
1771	}
1772
1773	// Make sure there is enough space to hold the results.
1774	// NOTE: This assumes that reallocate won't affect any bits if it doesn't
1775	// change the size. This is necessary if Quotient or Remainder is aliased
1776	// with LHS or RHS.
1777	Quotient.reallocate(NewBitWidth: BitWidth);
1778	Remainder.reallocate(NewBitWidth: BitWidth);
1779
1780	if (lhsWords == `1`) { // rhsWords is 1 if lhsWords is 1.
1781	// There is only one word to consider so use the native versions.
1782	uint64_t lhsValue = LHS.U.pVal[`0`];
1783	uint64_t rhsValue = RHS.U.pVal[`0`];
1784	Quotient = lhsValue / rhsValue;
1785	Remainder = lhsValue % rhsValue;
1786	return;
1787	}
1788
1789	// Okay, lets do it the long way
1790	divide(LHS: LHS.U.pVal, lhsWords, RHS: RHS.U.pVal, rhsWords, Quotient: Quotient.U.pVal,
1791	Remainder: Remainder.U.pVal);
1792	// Clear the rest of the Quotient and Remainder.
1793	std::memset(s: Quotient.U.pVal + lhsWords, c: `0`,
1794	n: (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1795	std::memset(s: Remainder.U.pVal + rhsWords, c: `0`,
1796	n: (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1797	}
1798
1799	void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1800	uint64_t &Remainder) {
1801	assert(RHS != `0` && "Divide by zero?");
1802	unsigned BitWidth = LHS.BitWidth;
1803
1804	// First, deal with the easy case
1805	if (LHS.isSingleWord()) {
1806	uint64_t QuotVal = LHS.U.VAL / RHS;
1807	Remainder = LHS.U.VAL % RHS;
1808	Quotient = APInt (BitWidth, QuotVal);
1809	return;
1810	}
1811
1812	// Get some size facts about the dividend and divisor
1813	unsigned lhsWords = getNumWords(BitWidth: LHS.getActiveBits());
1814
1815	// Check the degenerate cases
1816	if (lhsWords == `0`) {
1817	Quotient = APInt (BitWidth, `0`); // 0 / Y ===> 0
1818	Remainder = `0`; // 0 % Y ===> 0
1819	return;
1820	}
1821
1822	if (RHS == `1`) {
1823	Quotient = LHS; // X / 1 ===> X
1824	Remainder = `0`; // X % 1 ===> 0
1825	return;
1826	}
1827
1828	if (LHS.ult(RHS)) {
1829	Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y
1830	Quotient = APInt (BitWidth, `0`); // X / Y ===> 0, iff X < Y
1831	return;
1832	}
1833
1834	if (LHS == RHS) {
1835	Quotient = APInt (BitWidth, `1`); // X / X ===> 1
1836	Remainder = `0`; // X % X ===> 0;
1837	return;
1838	}
1839
1840	// Make sure there is enough space to hold the results.
1841	// NOTE: This assumes that reallocate won't affect any bits if it doesn't
1842	// change the size. This is necessary if Quotient is aliased with LHS.
1843	Quotient.reallocate(NewBitWidth: BitWidth);
1844
1845	if (lhsWords == `1`) { // rhsWords is 1 if lhsWords is 1.
1846	// There is only one word to consider so use the native versions.
1847	uint64_t lhsValue = LHS.U.pVal[`0`];
1848	Quotient = lhsValue / RHS;
1849	Remainder = lhsValue % RHS;
1850	return;
1851	}
1852
1853	// Okay, lets do it the long way
1854	divide(LHS: LHS.U.pVal, lhsWords, RHS: &RHS, rhsWords: `1`, Quotient: Quotient.U.pVal, Remainder: &Remainder);
1855	// Clear the rest of the Quotient.
1856	std::memset(s: Quotient.U.pVal + lhsWords, c: `0`,
1857	n: (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1858	}
1859
1860	void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1861	APInt &Quotient, APInt &Remainder) {
1862	if (LHS.isNegative()) {
1863	if (RHS.isNegative())
1864	APInt::udivrem(LHS: -LHS, RHS: -RHS, Quotient, Remainder);
1865	else {
1866	APInt::udivrem(LHS: -LHS, RHS, Quotient, Remainder);
1867	Quotient.negate();
1868	}
1869	Remainder.negate();
1870	} else if (RHS.isNegative()) {
1871	APInt::udivrem(LHS, RHS: -RHS, Quotient, Remainder);
1872	Quotient.negate();
1873	} else {
1874	APInt::udivrem(LHS, RHS, Quotient, Remainder);
1875	}
1876	}
1877
1878	void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1879	APInt &Quotient, int64_t &Remainder) {
1880	uint64_t R = Remainder;
1881	if (LHS.isNegative()) {
1882	if (RHS < `0`)
1883	APInt::udivrem(LHS: -LHS, RHS: -RHS, Quotient, Remainder&: R);
1884	else {
1885	APInt::udivrem(LHS: -LHS, RHS, Quotient, Remainder&: R);
1886	Quotient.negate();
1887	}
1888	R = -R;
1889	} else if (RHS < `0`) {
1890	APInt::udivrem(LHS, RHS: -RHS, Quotient, Remainder&: R);
1891	Quotient.negate();
1892	} else {
1893	APInt::udivrem(LHS, RHS, Quotient, Remainder&: R);
1894	}
1895	Remainder = R;
1896	}
1897
1898	APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1899	APInt Res = *this+RHS;
1900	Overflow = isNonNegative() == RHS.isNonNegative() &&
1901	Res.isNonNegative() != isNonNegative();
1902	return Res;
1903	}
1904
1905	APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1906	APInt Res = *this+RHS;
1907	Overflow = Res.ult(RHS);
1908	return Res;
1909	}
1910
1911	APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1912	APInt Res = *this - RHS;
1913	Overflow = isNonNegative() != RHS.isNonNegative() &&
1914	Res.isNonNegative() != isNonNegative();
1915	return Res;
1916	}
1917
1918	APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1919	APInt Res = *this-RHS;
1920	Overflow = Res.ugt(RHS: *this);
1921	return Res;
1922	}
1923
1924	APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1925	// MININT/-1 --> overflow.
1926	Overflow = isMinSignedValue() && RHS.isAllOnes();
1927	return sdiv(RHS);
1928	}
1929
1930	APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1931	APInt Res = *this * RHS;
1932
1933	if (RHS != `0`)
1934	Overflow = Res.sdiv(RHS) != *this \|\|
1935	(isMinSignedValue() && RHS.isAllOnes());
1936	else
1937	Overflow = false;
1938	return Res;
1939	}
1940
1941	APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1942	if (countl_zero() + RHS.countl_zero() + `2` <= BitWidth) {
1943	Overflow = true;
1944	return *this * RHS;
1945	}
1946
1947	APInt Res = lshr(shiftAmt: `1`) * RHS;
1948	Overflow = Res.isNegative();
1949	Res <<= `1`;
1950	if ((*this)[`0`]) {
1951	Res += RHS;
1952	if (Res.ult(RHS))
1953	Overflow = true;
1954	}
1955	return Res;
1956	}
1957
1958	APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1959	return sshl_ov(Amt: ShAmt.getLimitedValue(Limit: getBitWidth()), Overflow);
1960	}
1961
1962	APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {
1963	Overflow = ShAmt >= getBitWidth();
1964	if (Overflow)
1965	return APInt (BitWidth, `0`);
1966
1967	if (isNonNegative()) // Don't allow sign change.
1968	Overflow = ShAmt >= countl_zero();
1969	else
1970	Overflow = ShAmt >= countl_one();
1971
1972	return *this << ShAmt;
1973	}
1974
1975	APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1976	return ushl_ov(Amt: ShAmt.getLimitedValue(Limit: getBitWidth()), Overflow);
1977	}
1978
1979	APInt APInt::ushl_ov(unsigned ShAmt, bool &Overflow) const {
1980	Overflow = ShAmt >= getBitWidth();
1981	if (Overflow)
1982	return APInt (BitWidth, `0`);
1983
1984	Overflow = ShAmt > countl_zero();
1985
1986	return *this << ShAmt;
1987	}
1988
1989	APInt APInt::sfloordiv_ov(const APInt &RHS, bool &Overflow) const {
1990	APInt quotient = sdiv_ov(RHS, Overflow);
1991	if ((quotient * RHS != *this) && (isNegative() != RHS.isNegative()))
1992	return quotient - `1`;
1993	return quotient;
1994	}
1995
1996	APInt APInt::sadd_sat(const APInt &RHS) const {
1997	bool Overflow;
1998	APInt Res = sadd_ov(RHS, Overflow);
1999	if (!Overflow)
2000	return Res;
2001
2002	return isNegative() ? APInt::getSignedMinValue(numBits: BitWidth)
2003	: APInt::getSignedMaxValue(numBits: BitWidth);
2004	}
2005
2006	APInt APInt::uadd_sat(const APInt &RHS) const {
2007	bool Overflow;
2008	APInt Res = uadd_ov(RHS, Overflow);
2009	if (!Overflow)
2010	return Res;
2011
2012	return APInt::getMaxValue(numBits: BitWidth);
2013	}
2014
2015	APInt APInt::ssub_sat(const APInt &RHS) const {
2016	bool Overflow;
2017	APInt Res = ssub_ov(RHS, Overflow);
2018	if (!Overflow)
2019	return Res;
2020
2021	return isNegative() ? APInt::getSignedMinValue(numBits: BitWidth)
2022	: APInt::getSignedMaxValue(numBits: BitWidth);
2023	}
2024
2025	APInt APInt::usub_sat(const APInt &RHS) const {
2026	bool Overflow;
2027	APInt Res = usub_ov(RHS, Overflow);
2028	if (!Overflow)
2029	return Res;
2030
2031	return APInt (BitWidth, `0`);
2032	}
2033
2034	APInt APInt::smul_sat(const APInt &RHS) const {
2035	bool Overflow;
2036	APInt Res = smul_ov(RHS, Overflow);
2037	if (!Overflow)
2038	return Res;
2039
2040	// The result is negative if one and only one of inputs is negative.
2041	bool ResIsNegative = isNegative() ^ RHS.isNegative();
2042
2043	return ResIsNegative ? APInt::getSignedMinValue(numBits: BitWidth)
2044	: APInt::getSignedMaxValue(numBits: BitWidth);
2045	}
2046
2047	APInt APInt::umul_sat(const APInt &RHS) const {
2048	bool Overflow;
2049	APInt Res = umul_ov(RHS, Overflow);
2050	if (!Overflow)
2051	return Res;
2052
2053	return APInt::getMaxValue(numBits: BitWidth);
2054	}
2055
2056	APInt APInt::sshl_sat(const APInt &RHS) const {
2057	return sshl_sat(RHS: RHS.getLimitedValue(Limit: getBitWidth()));
2058	}
2059
2060	APInt APInt::sshl_sat(unsigned RHS) const {
2061	bool Overflow;
2062	APInt Res = sshl_ov(ShAmt: RHS, Overflow);
2063	if (!Overflow)
2064	return Res;
2065
2066	return isNegative() ? APInt::getSignedMinValue(numBits: BitWidth)
2067	: APInt::getSignedMaxValue(numBits: BitWidth);
2068	}
2069
2070	APInt APInt::ushl_sat(const APInt &RHS) const {
2071	return ushl_sat(RHS: RHS.getLimitedValue(Limit: getBitWidth()));
2072	}
2073
2074	APInt APInt::ushl_sat(unsigned RHS) const {
2075	bool Overflow;
2076	APInt Res = ushl_ov(ShAmt: RHS, Overflow);
2077	if (!Overflow)
2078	return Res;
2079
2080	return APInt::getMaxValue(numBits: BitWidth);
2081	}
2082
2083	void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2084	// Check our assumptions here
2085	assert(!str.empty() && "Invalid string length");
2086	assert((radix == `10` \|\| radix == `8` \|\| radix == `16` \|\| radix == `2` \|\|
2087	radix == `36`) &&
2088	"Radix should be 2, 8, 10, 16, or 36!");
2089
2090	StringRef::iterator p = str.begin();
2091	size_t slen = str.size();
2092	bool isNeg = *p == `'-'`;
2093	if (p == `'-'` \|\| p == `'+'`) {
2094	p++;
2095	slen--;
2096	assert(slen && "String is only a sign, needs a value.");
2097	}
2098	assert((slen <= numbits \|\| radix != `2`) && "Insufficient bit width");
2099	assert(((slen-`1`)*`3` <= numbits \|\| radix != `8`) && "Insufficient bit width");
2100	assert(((slen-`1`)*`4` <= numbits \|\| radix != `16`) && "Insufficient bit width");
2101	assert((((slen-`1`)*`64`)/`22` <= numbits \|\| radix != `10`) &&
2102	"Insufficient bit width");
2103
2104	// Allocate memory if needed
2105	if (isSingleWord())
2106	U.VAL = `0`;
2107	else
2108	U.pVal = getClearedMemory(numWords: getNumWords());
2109
2110	// Figure out if we can shift instead of multiply
2111	unsigned shift = (radix == `16` ? `4` : radix == `8` ? `3` : radix == `2` ? `1` : `0`);
2112
2113	// Enter digit traversal loop
2114	for (StringRef::iterator e = str.end(); p != e; ++p) {
2115	unsigned digit = getDigit(cdigit: *p, radix);
2116	assert(digit < radix && "Invalid character in digit string");
2117
2118	// Shift or multiply the value by the radix
2119	if (slen > `1`) {
2120	if (shift)
2121	*this <<= shift;
2122	else
2123	*this *= radix;
2124	}
2125
2126	// Add in the digit we just interpreted
2127	*this += digit;
2128	}
2129	// If its negative, put it in two's complement form
2130	if (isNeg)
2131	this->negate();
2132	}
2133
2134	void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
2135	bool formatAsCLiteral, bool UpperCase,
2136	bool InsertSeparators) const {
2137	assert((Radix == `10` \|\| Radix == `8` \|\| Radix == `16` \|\| Radix == `2` \|\|
2138	Radix == `36`) &&
2139	"Radix should be 2, 8, 10, 16, or 36!");
2140
2141	const char *Prefix = "";
2142	if (formatAsCLiteral) {
2143	switch (Radix) {
2144	case `2`:
2145	// Binary literals are a non-standard extension added in gcc 4.3:
2146	// http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2147	Prefix = "0b";
2148	break;
2149	case `8`:
2150	Prefix = "0";
2151	break;
2152	case `10`:
2153	break; // No prefix
2154	case `16`:
2155	Prefix = "0x";
2156	break;
2157	default:
2158	llvm_unreachable("Invalid radix!");
2159	}
2160	}
2161
2162	// Number of digits in a group between separators.
2163	unsigned Grouping = (Radix == `8` \|\| Radix == `10`) ? `3` : `4`;
2164
2165	// First, check for a zero value and just short circuit the logic below.
2166	if (isZero()) {
2167	while (*Prefix) {
2168	Str.push_back(Elt: *Prefix);
2169	++Prefix;
2170	};
2171	Str.push_back(Elt: `'0'`);
2172	return;
2173	}
2174
2175	static const char BothDigits[] = "0123456789abcdefghijklmnopqrstuvwxyz"
2176	"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2177	const char *Digits = BothDigits + (UpperCase ? `36` : `0`);
2178
2179	if (isSingleWord()) {
2180	char Buffer[`65`];
2181	char *BufPtr = std::end(arr&: Buffer);
2182
2183	uint64_t N;
2184	if (!Signed) {
2185	N = getZExtValue();
2186	} else {
2187	int64_t I = getSExtValue();
2188	if (I >= `0`) {
2189	N = I;
2190	} else {
2191	Str.push_back(Elt: `'-'`);
2192	N = -(uint64_t)I;
2193	}
2194	}
2195
2196	while (*Prefix) {
2197	Str.push_back(Elt: *Prefix);
2198	++Prefix;
2199	};
2200
2201	int Pos = `0`;
2202	while (N) {
2203	if (InsertSeparators && Pos % Grouping == `0` && Pos > `0`)
2204	*--BufPtr = `'\''`;
2205	*--BufPtr = Digits[N % Radix];
2206	N /= Radix;
2207	Pos++;
2208	}
2209	Str.append(in_start: BufPtr, in_end: std::end(arr&: Buffer));
2210	return;
2211	}
2212
2213	APInt Tmp(*this);
2214
2215	if (Signed && isNegative()) {
2216	// They want to print the signed version and it is a negative value
2217	// Flip the bits and add one to turn it into the equivalent positive
2218	// value and put a '-' in the result.
2219	Tmp.negate();
2220	Str.push_back(Elt: `'-'`);
2221	}
2222
2223	while (*Prefix) {
2224	Str.push_back(Elt: *Prefix);
2225	++Prefix;
2226	};
2227
2228	// We insert the digits backward, then reverse them to get the right order.
2229	unsigned StartDig = Str.size();
2230
2231	// For the 2, 8 and 16 bit cases, we can just shift instead of divide
2232	// because the number of bits per digit (1, 3 and 4 respectively) divides
2233	// equally. We just shift until the value is zero.
2234	if (Radix == `2` \|\| Radix == `8` \|\| Radix == `16`) {
2235	// Just shift tmp right for each digit width until it becomes zero
2236	unsigned ShiftAmt = (Radix == `16` ? `4` : (Radix == `8` ? `3` : `1`));
2237	unsigned MaskAmt = Radix - `1`;
2238
2239	int Pos = `0`;
2240	while (Tmp.getBoolValue()) {
2241	unsigned Digit = unsigned(Tmp.getRawData()[`0`]) & MaskAmt;
2242	if (InsertSeparators && Pos % Grouping == `0` && Pos > `0`)
2243	Str.push_back(Elt: `'\''`);
2244
2245	Str.push_back(Elt: Digits[Digit]);
2246	Tmp.lshrInPlace(ShiftAmt);
2247	Pos++;
2248	}
2249	} else {
2250	int Pos = `0`;
2251	while (Tmp.getBoolValue()) {
2252	uint64_t Digit;
2253	udivrem(LHS: Tmp, RHS: Radix, Quotient&: Tmp, Remainder&: Digit);
2254	assert(Digit < Radix && "divide failed");
2255	if (InsertSeparators && Pos % Grouping == `0` && Pos > `0`)
2256	Str.push_back(Elt: `'\''`);
2257
2258	Str.push_back(Elt: Digits[Digit]);
2259	Pos++;
2260	}
2261	}
2262
2263	// Reverse the digits before returning.
2264	std::reverse(first: Str.begin()+StartDig, last: Str.end());
2265	}
2266
2267	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2268	LLVM_DUMP_METHOD void APInt::dump() const {
2269	SmallString<`40`> S, U;
2270	this->toStringUnsigned(U);
2271	this->toStringSigned(S);
2272	dbgs() << "APInt(" << BitWidth << "b, "
2273	<< U << "u " << S << "s)\n";
2274	}
2275	#endif
2276
2277	void APInt::print(raw_ostream &OS, bool isSigned) const {
2278	SmallString<`40`> S;
2279	this->toString(Str&: S, Radix: `10`, Signed: isSigned, / formatAsCLiteral = /false);
2280	OS << S;
2281	}
2282
2283	// This implements a variety of operations on a representation of
2284	// arbitrary precision, two's-complement, bignum integer values.
2285
2286	// Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe
2287	// and unrestricting assumption.
2288	static_assert(APInt::APINT_BITS_PER_WORD % `2` == `0`,
2289	"Part width must be divisible by 2!");
2290
2291	// Returns the integer part with the least significant BITS set.
2292	// BITS cannot be zero.
2293	static inline APInt::WordType lowBitMask(unsigned bits) {
2294	assert(bits != `0` && bits <= APInt::APINT_BITS_PER_WORD);
2295	return ~(APInt::WordType) `0` >> (APInt::APINT_BITS_PER_WORD - bits);
2296	}
2297
2298	/// Returns the value of the lower half of PART.
2299	static inline APInt::WordType lowHalf(APInt::WordType part) {
2300	return part & lowBitMask(bits: APInt::APINT_BITS_PER_WORD / `2`);
2301	}
2302
2303	/// Returns the value of the upper half of PART.
2304	static inline APInt::WordType highHalf(APInt::WordType part) {
2305	return part >> (APInt::APINT_BITS_PER_WORD / `2`);
2306	}
2307
2308	/// Sets the least significant part of a bignum to the input value, and zeroes
2309	/// out higher parts.
2310	void APInt::tcSet(WordType dst, WordType part, unsigned* parts) {
2311	assert(parts > `0`);
2312	dst[`0`] = part;
2313	for (unsigned i = `1`; i < parts; i++)
2314	dst[i] = `0`;
2315	}
2316
2317	/// Assign one bignum to another.
2318	void APInt::tcAssign(WordType dst, const* WordType src, unsigned* parts) {
2319	for (unsigned i = `0`; i < parts; i++)
2320	dst[i] = src[i];
2321	}
2322
2323	/// Returns true if a bignum is zero, false otherwise.
2324	bool APInt::tcIsZero(const WordType src, unsigned* parts) {
2325	for (unsigned i = `0`; i < parts; i++)
2326	if (src[i])
2327	return false;
2328
2329	return true;
2330	}
2331
2332	/// Extract the given bit of a bignum; returns 0 or 1.
2333	int APInt::tcExtractBit(const WordType parts, unsigned* bit) {
2334	return (parts[whichWord(bitPosition: bit)] & maskBit(bitPosition: bit)) != `0`;
2335	}
2336
2337	/// Set the given bit of a bignum.
2338	void APInt::tcSetBit(WordType parts, unsigned* bit) {
2339	parts[whichWord(bitPosition: bit)] \|= maskBit(bitPosition: bit);
2340	}
2341
2342	/// Clears the given bit of a bignum.
2343	void APInt::tcClearBit(WordType parts, unsigned* bit) {
2344	parts[whichWord(bitPosition: bit)] &= ~maskBit(bitPosition: bit);
2345	}
2346
2347	/// Returns the bit number of the least significant set bit of a number. If the
2348	/// input number has no bits set UINT_MAX is returned.
2349	unsigned APInt::tcLSB(const WordType parts, unsigned* n) {
2350	for (unsigned i = `0`; i < n; i++) {
2351	if (parts[i] != `0`) {
2352	unsigned lsb = llvm::countr_zero(Val: parts[i]);
2353	return lsb + i * APINT_BITS_PER_WORD;
2354	}
2355	}
2356
2357	return UINT_MAX;
2358	}
2359
2360	/// Returns the bit number of the most significant set bit of a number.
2361	/// If the input number has no bits set UINT_MAX is returned.
2362	unsigned APInt::tcMSB(const WordType parts, unsigned* n) {
2363	do {
2364	--n;
2365
2366	if (parts[n] != `0`) {
2367	static_assert(sizeof(parts[n]) <= sizeof(uint64_t));
2368	unsigned msb = llvm::Log2_64(Value: parts[n]);
2369
2370	return msb + n * APINT_BITS_PER_WORD;
2371	}
2372	} while (n);
2373
2374	return UINT_MAX;
2375	}
2376
2377	/// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
2378	/// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
2379	/// significant bit of DST. All high bits above srcBITS in DST are zero-filled.
2380	/// /*
2381	void
2382	APInt::tcExtract(WordType dst, unsigned* dstCount, const WordType *src,
2383	unsigned srcBits, unsigned srcLSB) {
2384	unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - `1`) / APINT_BITS_PER_WORD;
2385	assert(dstParts <= dstCount);
2386
2387	unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2388	tcAssign(dst, src: src + firstSrcPart, parts: dstParts);
2389
2390	unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2391	tcShiftRight(dst, Words: dstParts, Count: shift);
2392
2393	// We now have (dstParts APINT_BITS_PER_WORD - shift) bits from SRC*
2394	// in DST. If this is less that srcBits, append the rest, else
2395	// clear the high bits.
2396	unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2397	if (n < srcBits) {
2398	WordType mask = lowBitMask (bits: srcBits - n);
2399	dst[dstParts - `1`] \|= ((src[firstSrcPart + dstParts] & mask)
2400	<< n % APINT_BITS_PER_WORD);
2401	} else if (n > srcBits) {
2402	if (srcBits % APINT_BITS_PER_WORD)
2403	dst[dstParts - `1`] &= lowBitMask (bits: srcBits % APINT_BITS_PER_WORD);
2404	}
2405
2406	// Clear high parts.
2407	while (dstParts < dstCount)
2408	dst[dstParts++] = `0`;
2409	}
2410
2411	//// DST += RHS + C where C is zero or one. Returns the carry flag.
2412	APInt::WordType APInt::tcAdd(WordType dst, const* WordType *rhs,
2413	WordType c, unsigned parts) {
2414	assert(c <= `1`);
2415
2416	for (unsigned i = `0`; i < parts; i++) {
2417	WordType l = dst[i];
2418	if (c) {
2419	dst[i] += rhs[i] + `1`;
2420	c = (dst[i] <= l);
2421	} else {
2422	dst[i] += rhs[i];
2423	c = (dst[i] < l);
2424	}
2425	}
2426
2427	return c;
2428	}
2429
2430	/// This function adds a single "word" integer, src, to the multiple
2431	/// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2432	/// 1 is returned if there is a carry out, otherwise 0 is returned.
2433	/// @returns the carry of the addition.
2434	APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2435	unsigned parts) {
2436	for (unsigned i = `0`; i < parts; ++i) {
2437	dst[i] += src;
2438	if (dst[i] >= src)
2439	return `0`; // No need to carry so exit early.
2440	src = `1`; // Carry one to next digit.
2441	}
2442
2443	return `1`;
2444	}
2445
2446	/// DST -= RHS + C where C is zero or one. Returns the carry flag.
2447	APInt::WordType APInt::tcSubtract(WordType dst, const* WordType *rhs,
2448	WordType c, unsigned parts) {
2449	assert(c <= `1`);
2450
2451	for (unsigned i = `0`; i < parts; i++) {
2452	WordType l = dst[i];
2453	if (c) {
2454	dst[i] -= rhs[i] + `1`;
2455	c = (dst[i] >= l);
2456	} else {
2457	dst[i] -= rhs[i];
2458	c = (dst[i] > l);
2459	}
2460	}
2461
2462	return c;
2463	}
2464
2465	/// This function subtracts a single "word" (64-bit word), src, from
2466	/// the multi-word integer array, dst[], propagating the borrowed 1 value until
2467	/// no further borrowing is needed or it runs out of "words" in dst. The result
2468	/// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2469	/// exhausted. In other words, if src > dst then this function returns 1,
2470	/// otherwise 0.
2471	/// @returns the borrow out of the subtraction
2472	APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2473	unsigned parts) {
2474	for (unsigned i = `0`; i < parts; ++i) {
2475	WordType Dst = dst[i];
2476	dst[i] -= src;
2477	if (src <= Dst)
2478	return `0`; // No need to borrow so exit early.
2479	src = `1`; // We have to "borrow 1" from next "word"
2480	}
2481
2482	return `1`;
2483	}
2484
2485	/// Negate a bignum in-place.
2486	void APInt::tcNegate(WordType dst, unsigned* parts) {
2487	tcComplement(dst, parts);
2488	tcIncrement(dst, parts);
2489	}
2490
2491	/// DST += SRC MULTIPLIER + CARRY if add is true*
2492	/// DST = SRC MULTIPLIER + CARRY if add is false*
2493	/// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
2494	/// they must start at the same point, i.e. DST == SRC.
2495	/// If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2496	/// returned. Otherwise DST is filled with the least significant
2497	/// DSTPARTS parts of the result, and if all of the omitted higher
2498	/// parts were zero return zero, otherwise overflow occurred and
2499	/// return one.
2500	int APInt::tcMultiplyPart(WordType dst, const* WordType *src,
2501	WordType multiplier, WordType carry,
2502	unsigned srcParts, unsigned dstParts,
2503	bool add) {
2504	// Otherwise our writes of DST kill our later reads of SRC.
2505	assert(dst <= src \|\| dst >= src + srcParts);
2506	assert(dstParts <= srcParts + `1`);
2507
2508	// N loops; minimum of dstParts and srcParts.
2509	unsigned n = std::min(a: dstParts, b: srcParts);
2510
2511	for (unsigned i = `0`; i < n; i++) {
2512	// [LOW, HIGH] = MULTIPLIER SRC[i] + DST[i] + CARRY.*
2513	// This cannot overflow, because:
2514	// (n - 1) (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)*
2515	// which is less than n^2.
2516	WordType srcPart = src[i];
2517	WordType low, mid, high;
2518	if (multiplier == `0` \|\| srcPart == `0`) {
2519	low = carry;
2520	high = `0`;
2521	} else {
2522	low = lowHalf(part: srcPart) * lowHalf(part: multiplier);
2523	high = highHalf(part: srcPart) * highHalf(part: multiplier);
2524
2525	mid = lowHalf(part: srcPart) * highHalf(part: multiplier);
2526	high += highHalf(part: mid);
2527	mid <<= APINT_BITS_PER_WORD / `2`;
2528	if (low + mid < low)
2529	high++;
2530	low += mid;
2531
2532	mid = highHalf(part: srcPart) * lowHalf(part: multiplier);
2533	high += highHalf(part: mid);
2534	mid <<= APINT_BITS_PER_WORD / `2`;
2535	if (low + mid < low)
2536	high++;
2537	low += mid;
2538
2539	// Now add carry.
2540	if (low + carry < low)
2541	high++;
2542	low += carry;
2543	}
2544
2545	if (add) {
2546	// And now DST[i], and store the new low part there.
2547	if (low + dst[i] < low)
2548	high++;
2549	dst[i] += low;
2550	} else
2551	dst[i] = low;
2552
2553	carry = high;
2554	}
2555
2556	if (srcParts < dstParts) {
2557	// Full multiplication, there is no overflow.
2558	assert(srcParts + `1` == dstParts);
2559	dst[srcParts] = carry;
2560	return `0`;
2561	}
2562
2563	// We overflowed if there is carry.
2564	if (carry)
2565	return `1`;
2566
2567	// We would overflow if any significant unwritten parts would be
2568	// non-zero. This is true if any remaining src parts are non-zero
2569	// and the multiplier is non-zero.
2570	if (multiplier)
2571	for (unsigned i = dstParts; i < srcParts; i++)
2572	if (src[i])
2573	return `1`;
2574
2575	// We fitted in the narrow destination.
2576	return `0`;
2577	}
2578
2579	/// DST = LHS RHS, where DST has the same width as the operands and*
2580	/// is filled with the least significant parts of the result. Returns
2581	/// one if overflow occurred, otherwise zero. DST must be disjoint
2582	/// from both operands.
2583	int APInt::tcMultiply(WordType dst, const* WordType *lhs,
2584	const WordType rhs, unsigned* parts) {
2585	assert(dst != lhs && dst != rhs);
2586
2587	int overflow = `0`;
2588
2589	for (unsigned i = `0`; i < parts; i++) {
2590	// Don't accumulate on the first iteration so we don't need to initalize
2591	// dst to 0.
2592	overflow \|=
2593	tcMultiplyPart(dst: &dst[i], src: lhs, multiplier: rhs[i], carry: `0`, srcParts: parts, dstParts: parts - i, add: i != `0`);
2594	}
2595
2596	return overflow;
2597	}
2598
2599	/// DST = LHS RHS, where DST has width the sum of the widths of the*
2600	/// operands. No overflow occurs. DST must be disjoint from both operands.
2601	void APInt::tcFullMultiply(WordType dst, const* WordType *lhs,
2602	const WordType rhs, unsigned* lhsParts,
2603	unsigned rhsParts) {
2604	// Put the narrower number on the LHS for less loops below.
2605	if (lhsParts > rhsParts)
2606	return tcFullMultiply (dst, lhs: rhs, rhs: lhs, lhsParts: rhsParts, rhsParts: lhsParts);
2607
2608	assert(dst != lhs && dst != rhs);
2609
2610	for (unsigned i = `0`; i < lhsParts; i++) {
2611	// Don't accumulate on the first iteration so we don't need to initalize
2612	// dst to 0.
2613	tcMultiplyPart(dst: &dst[i], src: rhs, multiplier: lhs[i], carry: `0`, srcParts: rhsParts, dstParts: rhsParts + `1`, add: i != `0`);
2614	}
2615	}
2616
2617	// If RHS is zero LHS and REMAINDER are left unchanged, return one.
2618	// Otherwise set LHS to LHS / RHS with the fractional part discarded,
2619	// set REMAINDER to the remainder, return zero. i.e.
2620	//
2621	// OLD_LHS = RHS LHS + REMAINDER*
2622	//
2623	// SCRATCH is a bignum of the same size as the operands and result for
2624	// use by the routine; its contents need not be initialized and are
2625	// destroyed. LHS, REMAINDER and SCRATCH must be distinct.
2626	int APInt::tcDivide(WordType lhs, const* WordType *rhs,
2627	WordType remainder, WordType srhs,
2628	unsigned parts) {
2629	assert(lhs != remainder && lhs != srhs && remainder != srhs);
2630
2631	unsigned shiftCount = tcMSB(parts: rhs, n: parts) + `1`;
2632	if (shiftCount == `0`)
2633	return true;
2634
2635	shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2636	unsigned n = shiftCount / APINT_BITS_PER_WORD;
2637	WordType mask = (WordType) `1` << (shiftCount % APINT_BITS_PER_WORD);
2638
2639	tcAssign(dst: srhs, src: rhs, parts);
2640	tcShiftLeft(srhs, Words: parts, Count: shiftCount);
2641	tcAssign(dst: remainder, src: lhs, parts);
2642	tcSet(dst: lhs, part: `0`, parts);
2643
2644	// Loop, subtracting SRHS if REMAINDER is greater and adding that to the
2645	// total.
2646	for (;;) {
2647	int compare = tcCompare(remainder, srhs, parts);
2648	if (compare >= `0`) {
2649	tcSubtract(dst: remainder, rhs: srhs, c: `0`, parts);
2650	lhs[n] \|= mask;
2651	}
2652
2653	if (shiftCount == `0`)
2654	break;
2655	shiftCount--;
2656	tcShiftRight(srhs, Words: parts, Count: `1`);
2657	if ((mask >>= `1`) == `0`) {
2658	mask = (WordType) `1` << (APINT_BITS_PER_WORD - `1`);
2659	n--;
2660	}
2661	}
2662
2663	return false;
2664	}
2665
2666	/// Shift a bignum left Count bits in-place. Shifted in bits are zero. There are
2667	/// no restrictions on Count.
2668	void APInt::tcShiftLeft(WordType Dst, unsigned* Words, unsigned Count) {
2669	// Don't bother performing a no-op shift.
2670	if (!Count)
2671	return;
2672
2673	// WordShift is the inter-part shift; BitShift is the intra-part shift.
2674	unsigned WordShift = std::min(a: Count / APINT_BITS_PER_WORD, b: Words);
2675	unsigned BitShift = Count % APINT_BITS_PER_WORD;
2676
2677	// Fastpath for moving by whole words.
2678	if (BitShift == `0`) {
2679	std::memmove(dest: Dst + WordShift, src: Dst, n: (Words - WordShift) * APINT_WORD_SIZE);
2680	} else {
2681	while (Words-- > WordShift) {
2682	Dst[Words] = Dst[Words - WordShift] << BitShift;
2683	if (Words > WordShift)
2684	Dst[Words] \|=
2685	Dst[Words - WordShift - `1`] >> (APINT_BITS_PER_WORD - BitShift);
2686	}
2687	}
2688
2689	// Fill in the remainder with 0s.
2690	std::memset(s: Dst, c: `0`, n: WordShift * APINT_WORD_SIZE);
2691	}
2692
2693	/// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2694	/// are no restrictions on Count.
2695	void APInt::tcShiftRight(WordType Dst, unsigned* Words, unsigned Count) {
2696	// Don't bother performing a no-op shift.
2697	if (!Count)
2698	return;
2699
2700	// WordShift is the inter-part shift; BitShift is the intra-part shift.
2701	unsigned WordShift = std::min(a: Count / APINT_BITS_PER_WORD, b: Words);
2702	unsigned BitShift = Count % APINT_BITS_PER_WORD;
2703
2704	unsigned WordsToMove = Words - WordShift;
2705	// Fastpath for moving by whole words.
2706	if (BitShift == `0`) {
2707	std::memmove(dest: Dst, src: Dst + WordShift, n: WordsToMove * APINT_WORD_SIZE);
2708	} else {
2709	for (unsigned i = `0`; i != WordsToMove; ++i) {
2710	Dst[i] = Dst[i + WordShift] >> BitShift;
2711	if (i + `1` != WordsToMove)
2712	Dst[i] \|= Dst[i + WordShift + `1`] << (APINT_BITS_PER_WORD - BitShift);
2713	}
2714	}
2715
2716	// Fill in the remainder with 0s.
2717	std::memset(s: Dst + WordsToMove, c: `0`, n: WordShift * APINT_WORD_SIZE);
2718	}
2719
2720	// Comparison (unsigned) of two bignums.
2721	int APInt::tcCompare(const WordType lhs, const* WordType *rhs,
2722	unsigned parts) {
2723	while (parts) {
2724	parts--;
2725	if (lhs[parts] != rhs[parts])
2726	return (lhs[parts] > rhs[parts]) ? `1` : -`1`;
2727	}
2728
2729	return `0`;
2730	}
2731
2732	APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2733	APInt::Rounding RM) {
2734	// Currently udivrem always rounds down.
2735	switch (RM) {
2736	case APInt::Rounding::DOWN:
2737	case APInt::Rounding::TOWARD_ZERO:
2738	return A.udiv(RHS: B);
2739	case APInt::Rounding::UP: {
2740	APInt Quo, Rem;
2741	APInt::udivrem(LHS: A, RHS: B, Quotient&: Quo, Remainder&: Rem);
2742	if (Rem.isZero())
2743	return Quo;
2744	return Quo + `1`;
2745	}
2746	}
2747	llvm_unreachable("Unknown APInt::Rounding enum");
2748	}
2749
2750	APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2751	APInt::Rounding RM) {
2752	switch (RM) {
2753	case APInt::Rounding::DOWN:
2754	case APInt::Rounding::UP: {
2755	APInt Quo, Rem;
2756	APInt::sdivrem(LHS: A, RHS: B, Quotient&: Quo, Remainder&: Rem);
2757	if (Rem.isZero())
2758	return Quo;
2759	// This algorithm deals with arbitrary rounding mode used by sdivrem.
2760	// We want to check whether the non-integer part of the mathematical value
2761	// is negative or not. If the non-integer part is negative, we need to round
2762	// down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2763	// already rounded down.
2764	if (RM == APInt::Rounding::DOWN) {
2765	if (Rem.isNegative() != B.isNegative())
2766	return Quo - `1`;
2767	return Quo;
2768	}
2769	if (Rem.isNegative() != B.isNegative())
2770	return Quo;
2771	return Quo + `1`;
2772	}
2773	// Currently sdiv rounds towards zero.
2774	case APInt::Rounding::TOWARD_ZERO:
2775	return A.sdiv(RHS: B);
2776	}
2777	llvm_unreachable("Unknown APInt::Rounding enum");
2778	}
2779
2780	std::optional<APInt>
2781	llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2782	unsigned RangeWidth) {
2783	unsigned CoeffWidth = A.getBitWidth();
2784	assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2785	assert(RangeWidth <= CoeffWidth &&
2786	"Value range width should be less than coefficient width");
2787	assert(RangeWidth > `1` && "Value range bit width should be > 1");
2788
2789	LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2790	<< "x + " << C << ", rw:" << RangeWidth << `'\n'`);
2791
2792	// Identify 0 as a (non)solution immediately.
2793	if (C.sextOrTrunc(width: RangeWidth).isZero()) {
2794	LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2795	return APInt (CoeffWidth, `0`);
2796	}
2797
2798	// The result of APInt arithmetic has the same bit width as the operands,
2799	// so it can actually lose high bits. A product of two n-bit integers needs
2800	// 2n-1 bits to represent the full value.
2801	// The operation done below (on quadratic coefficients) that can produce
2802	// the largest value is the evaluation of the equation during bisection,
2803	// which needs 3 times the bitwidth of the coefficient, so the total number
2804	// of required bits is 3n.
2805	//
2806	// The purpose of this extension is to simulate the set Z of all integers,
2807	// where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2808	// and negative numbers (not so much in a modulo arithmetic). The method
2809	// used to solve the equation is based on the standard formula for real
2810	// numbers, and uses the concepts of "positive" and "negative" with their
2811	// usual meanings.
2812	CoeffWidth *= `3`;
2813	A = A.sext(Width: CoeffWidth);
2814	B = B.sext(Width: CoeffWidth);
2815	C = C.sext(Width: CoeffWidth);
2816
2817	// Make A > 0 for simplicity. Negate cannot overflow at this point because
2818	// the bit width has increased.
2819	if (A.isNegative()) {
2820	A.negate();
2821	B.negate();
2822	C.negate();
2823	}
2824
2825	// Solving an equation q(x) = 0 with coefficients in modular arithmetic
2826	// is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2827	// and R = 2^BitWidth.
2828	// Since we're trying not only to find exact solutions, but also values
2829	// that "wrap around", such a set will always have a solution, i.e. an x
2830	// that satisfies at least one of the equations, or such that \|q(x)\|
2831	// exceeds kR, while \|q(x-1)\| for the same k does not.
2832	//
2833	// We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2834	// positive solution n (in the above sense), and also such that the n
2835	// will be the least among all solutions corresponding to k = 0, 1, ...
2836	// (more precisely, the least element in the set
2837	// { n(k) \| k is such that a solution n(k) exists }).
2838	//
2839	// Consider the parabola (over real numbers) that corresponds to the
2840	// quadratic equation. Since A > 0, the arms of the parabola will point
2841	// up. Picking different values of k will shift it up and down by R.
2842	//
2843	// We want to shift the parabola in such a way as to reduce the problem
2844	// of solving q(x) = kR to solving shifted_q(x) = 0.
2845	// (The interesting solutions are the ceilings of the real number
2846	// solutions.)
2847	APInt R = APInt::getOneBitSet(numBits: CoeffWidth, BitNo: RangeWidth);
2848	APInt TwoA = `2` * A;
2849	APInt SqrB = B * B;
2850	bool PickLow;
2851
2852	auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2853	assert(A.isStrictlyPositive());
2854	APInt T = V.abs().urem(RHS: A);
2855	if (T.isZero())
2856	return V;
2857	return V.isNegative() ? V +T : V +(A -T);
2858	};
2859
2860	// The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2861	// iff B is positive.
2862	if (B.isNonNegative()) {
2863	// If B >= 0, the vertex it at a negative location (or at 0), so in
2864	// order to have a non-negative solution we need to pick k that makes
2865	// C-kR negative. To satisfy all the requirements for the solution
2866	// that we are looking for, it needs to be closest to 0 of all k.
2867	C = C.srem(RHS: R);
2868	if (C.isStrictlyPositive())
2869	C -= R;
2870	// Pick the greater solution.
2871	PickLow = false;
2872	} else {
2873	// If B < 0, the vertex is at a positive location. For any solution
2874	// to exist, the discriminant must be non-negative. This means that
2875	// C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2876	// lower bound on values of k: kR >= C - B^2/4A.
2877	APInt LowkR = C - SqrB.udiv(RHS: `2`TwoA); // udiv because all values > 0.*
2878	// Round LowkR up (towards +inf) to the nearest kR.
2879	LowkR = RoundUp (LowkR, R);
2880
2881	// If there exists k meeting the condition above, and such that
2882	// C-kR > 0, there will be two positive real number solutions of
2883	// q(x) = kR. Out of all such values of k, pick the one that makes
2884	// C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2885	// In other words, find maximum k such that LowkR <= kR < C.
2886	if (C.sgt(RHS: LowkR)) {
2887	// If LowkR < C, then such a k is guaranteed to exist because
2888	// LowkR itself is a multiple of R.
2889	C -= -RoundUp (-C, R); // C = C - RoundDown(C, R)
2890	// Pick the smaller solution.
2891	PickLow = true;
2892	} else {
2893	// If C-kR < 0 for all potential k's, it means that one solution
2894	// will be negative, while the other will be positive. The positive
2895	// solution will shift towards 0 if the parabola is moved up.
2896	// Pick the kR closest to the lower bound (i.e. make C-kR closest
2897	// to 0, or in other words, out of all parabolas that have solutions,
2898	// pick the one that is the farthest "up").
2899	// Since LowkR is itself a multiple of R, simply take C-LowkR.
2900	C -= LowkR;
2901	// Pick the greater solution.
2902	PickLow = false;
2903	}
2904	}
2905
2906	LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2907	<< B << "x + " << C << ", rw:" << RangeWidth << `'\n'`);
2908
2909	APInt D = SqrB - `4`A C;
2910	assert(D.isNonNegative() && "Negative discriminant");
2911	APInt SQ = D.sqrt();
2912
2913	APInt Q = SQ * SQ;
2914	bool InexactSQ = Q != D;
2915	// The calculated SQ may actually be greater than the exact (non-integer)
2916	// value. If that's the case, decrement SQ to get a value that is lower.
2917	if (Q.sgt(RHS: D))
2918	SQ -= `1`;
2919
2920	APInt X;
2921	APInt Rem;
2922
2923	// SQ is rounded down (i.e SQ SQ <= D), so the roots may be inexact.*
2924	// When using the quadratic formula directly, the calculated low root
2925	// may be greater than the exact one, since we would be subtracting SQ.
2926	// To make sure that the calculated root is not greater than the exact
2927	// one, subtract SQ+1 when calculating the low root (for inexact value
2928	// of SQ).
2929	if (PickLow)
2930	APInt::sdivrem(LHS: -B - (SQ +InexactSQ), RHS: TwoA, Quotient&: X, Remainder&: Rem);
2931	else
2932	APInt::sdivrem(LHS: -B + SQ, RHS: TwoA, Quotient&: X, Remainder&: Rem);
2933
2934	// The updated coefficients should be such that the (exact) solution is
2935	// positive. Since APInt division rounds towards 0, the calculated one
2936	// can be 0, but cannot be negative.
2937	assert(X.isNonNegative() && "Solution should be non-negative");
2938
2939	if (!InexactSQ && Rem.isZero()) {
2940	LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << `'\n'`);
2941	return X;
2942	}
2943
2944	assert((SQSQ).sle(D) && "SQ = \|_sqrt(D)_\|, so SQSQ <= D");
2945	// The exact value of the square root of D should be between SQ and SQ+1.
2946	// This implies that the solution should be between that corresponding to
2947	// SQ (i.e. X) and that corresponding to SQ+1.
2948	//
2949	// The calculated X cannot be greater than the exact (real) solution.
2950	// Actually it must be strictly less than the exact solution, while
2951	// X+1 will be greater than or equal to it.
2952
2953	APInt VX = (A X + B)X + C;
2954	APInt VY = VX + TwoA *X + A + B;
2955	bool SignChange =
2956	VX.isNegative() != VY.isNegative() \|\| VX.isZero() != VY.isZero();
2957	// If the sign did not change between X and X+1, X is not a valid solution.
2958	// This could happen when the actual (exact) roots don't have an integer
2959	// between them, so they would both be contained between X and X+1.
2960	if (!SignChange) {
2961	LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2962	return std::nullopt;
2963	}
2964
2965	X += `1`;
2966	LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << `'\n'`);
2967	return X;
2968	}
2969
2970	std::optional<unsigned>
2971	llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
2972	assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
2973	if (A == B)
2974	return std::nullopt;
2975	return A.getBitWidth() - ((A ^ B).countl_zero() + `1`);
2976	}
2977
2978	APInt llvm::APIntOps::ScaleBitMask(const APInt &A, unsigned NewBitWidth,
2979	bool MatchAllBits) {
2980	unsigned OldBitWidth = A.getBitWidth();
2981	assert((((OldBitWidth % NewBitWidth) == `0`) \|\|
2982	((NewBitWidth % OldBitWidth) == `0`)) &&
2983	"One size should be a multiple of the other one. "
2984	"Can't do fractional scaling.");
2985
2986	// Check for matching bitwidths.
2987	if (OldBitWidth == NewBitWidth)
2988	return A;
2989
2990	APInt NewA = APInt::getZero(numBits: NewBitWidth);
2991
2992	// Check for null input.
2993	if (A.isZero())
2994	return NewA;
2995
2996	if (NewBitWidth > OldBitWidth) {
2997	// Repeat bits.
2998	unsigned Scale = NewBitWidth / OldBitWidth;
2999	for (unsigned i = `0`; i != OldBitWidth; ++i)
3000	if (A [i])
3001	NewA.setBits(loBit: i * Scale, hiBit: (i + `1`) * Scale);
3002	} else {
3003	unsigned Scale = OldBitWidth / NewBitWidth;
3004	for (unsigned i = `0`; i != NewBitWidth; ++i) {
3005	if (MatchAllBits) {
3006	if (A.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3007	NewA.setBit(i);
3008	} else {
3009	if (!A.extractBits(numBits: Scale, bitPosition: i * Scale).isZero())
3010	NewA.setBit(i);
3011	}
3012	}
3013	}
3014
3015	return NewA;
3016	}
3017
3018	/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
3019	/// with the integer held in IntVal.
3020	void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
3021	unsigned StoreBytes) {
3022	assert((IntVal.getBitWidth()+`7`)/`8` >= StoreBytes && "Integer too small!");
3023	const uint8_t Src = (const* uint8_t *)IntVal.getRawData();
3024
3025	if (sys::IsLittleEndianHost) {
3026	// Little-endian host - the source is ordered from LSB to MSB. Order the
3027	// destination from LSB to MSB: Do a straight copy.
3028	memcpy(dest: Dst, src: Src, n: StoreBytes);
3029	} else {
3030	// Big-endian host - the source is an array of 64 bit words ordered from
3031	// LSW to MSW. Each word is ordered from MSB to LSB. Order the destination
3032	// from MSB to LSB: Reverse the word order, but not the bytes in a word.
3033	while (StoreBytes > sizeof(uint64_t)) {
3034	StoreBytes -= sizeof(uint64_t);
3035	// May not be aligned so use memcpy.
3036	memcpy(dest: Dst + StoreBytes, src: Src, n: sizeof(uint64_t));
3037	Src += sizeof(uint64_t);
3038	}
3039
3040	memcpy(dest: Dst, src: Src + sizeof(uint64_t) - StoreBytes, n: StoreBytes);
3041	}
3042	}
3043
3044	/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3045	/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
3046	void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src,
3047	unsigned LoadBytes) {
3048	assert((IntVal.getBitWidth()+`7`)/`8` >= LoadBytes && "Integer too small!");
3049	uint8_t Dst = reinterpret_cast<uint8_t >(
3050	const_cast<uint64_t *>(IntVal.getRawData()));
3051
3052	if (sys::IsLittleEndianHost)
3053	// Little-endian host - the destination must be ordered from LSB to MSB.
3054	// The source is ordered from LSB to MSB: Do a straight copy.
3055	memcpy(dest: Dst, src: Src, n: LoadBytes);
3056	else {
3057	// Big-endian - the destination is an array of 64 bit words ordered from
3058	// LSW to MSW. Each word must be ordered from MSB to LSB. The source is
3059	// ordered from MSB to LSB: Reverse the word order, but not the bytes in
3060	// a word.
3061	while (LoadBytes > sizeof(uint64_t)) {
3062	LoadBytes -= sizeof(uint64_t);
3063	// May not be aligned so use memcpy.
3064	memcpy(dest: Dst, src: Src + LoadBytes, n: sizeof(uint64_t));
3065	Dst += sizeof(uint64_t);
3066	}
3067
3068	memcpy(dest: Dst + sizeof(uint64_t) - LoadBytes, src: Src, n: LoadBytes);
3069	}
3070	}
3071
3072	APInt APIntOps::avgFloorS(const APInt &C1, const APInt &C2) {
3073	// Return floor((C1 + C2) / 2)
3074	return (C1 & C2) + (C1 ^ C2).ashr(ShiftAmt: `1`);
3075	}
3076
3077	APInt APIntOps::avgFloorU(const APInt &C1, const APInt &C2) {
3078	// Return floor((C1 + C2) / 2)
3079	return (C1 & C2) + (C1 ^ C2).lshr(shiftAmt: `1`);
3080	}
3081
3082	APInt APIntOps::avgCeilS(const APInt &C1, const APInt &C2) {
3083	// Return ceil((C1 + C2) / 2)
3084	return (C1 \| C2) - (C1 ^ C2).ashr(ShiftAmt: `1`);
3085	}
3086
3087	APInt APIntOps::avgCeilU(const APInt &C1, const APInt &C2) {
3088	// Return ceil((C1 + C2) / 2)
3089	return (C1 \| C2) - (C1 ^ C2).lshr(shiftAmt: `1`);
3090	}
3091
3092	APInt APIntOps::mulhs(const APInt &C1, const APInt &C2) {
3093	assert(C1.getBitWidth() == C2.getBitWidth() && "Unequal bitwidths");
3094	unsigned FullWidth = C1.getBitWidth() * `2`;
3095	APInt C1Ext = C1.sext(Width: FullWidth);
3096	APInt C2Ext = C2.sext(Width: FullWidth);
3097	return (C1Ext * C2Ext).extractBits(numBits: C1.getBitWidth(), bitPosition: C1.getBitWidth());
3098	}
3099
3100	APInt APIntOps::mulhu(const APInt &C1, const APInt &C2) {
3101	assert(C1.getBitWidth() == C2.getBitWidth() && "Unequal bitwidths");
3102	unsigned FullWidth = C1.getBitWidth() * `2`;
3103	APInt C1Ext = C1.zext(width: FullWidth);
3104	APInt C2Ext = C2.zext(width: FullWidth);
3105	return (C1Ext * C2Ext).extractBits(numBits: C1.getBitWidth(), bitPosition: C1.getBitWidth());
3106	}
3107

Browse the source code of llvm_projects/llvm/lib/Support/APInt.cpp