ExpandIRInsts.cpp source code [llvm_projects/llvm/lib/CodeGen/ExpandIRInsts.cpp]

1	//===--- ExpandIRInsts.cpp - Expand IR instructions -----------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	// This pass expands certain instructions at the IR level.
9	//
10	// The following expansions are implemented:
11	// - Expansion of ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’, ‘sitofp
12	// .. to’ instructions with a bitwidth above a threshold. This is
13	// useful for targets like x86_64 that cannot lower fp convertions
14	// with more than 128 bits.
15	//
16	// - Expansion of ‘frem‘ for types MVT::f16, MVT::f32, and MVT::f64 for
17	// targets which use "Expand" as the legalization action for the
18	// corresponding type.
19	//
20	// - Expansion of ‘udiv‘, ‘sdiv‘, ‘urem‘, and ‘srem‘ instructions with
21	// a bitwidth above a threshold into a call to auto-generated
22	// functions. This is useful for targets like x86_64 that cannot
23	// lower divisions with more than 128 bits or targets like x86_32 that
24	// cannot lower divisions with more than 64 bits.
25	//
26	// Instructions with vector types are scalarized first if their scalar
27	// types can be expanded. Scalable vector types are not supported.
28	//===----------------------------------------------------------------------===//
29
30	#include "llvm/CodeGen/ExpandIRInsts.h"
31	#include "llvm/ADT/SmallVector.h"
32	#include "llvm/Analysis/AssumptionCache.h"
33	#include "llvm/Analysis/GlobalsModRef.h"
34	#include "llvm/Analysis/SimplifyQuery.h"
35	#include "llvm/Analysis/ValueTracking.h"
36	#include "llvm/CodeGen/ISDOpcodes.h"
37	#include "llvm/CodeGen/Passes.h"
38	#include "llvm/CodeGen/TargetLowering.h"
39	#include "llvm/CodeGen/TargetPassConfig.h"
40	#include "llvm/CodeGen/TargetSubtargetInfo.h"
41	#include "llvm/IR/IRBuilder.h"
42	#include "llvm/IR/InstIterator.h"
43	#include "llvm/IR/Instruction.h"
44	#include "llvm/IR/Instructions.h"
45	#include "llvm/IR/IntrinsicInst.h"
46	#include "llvm/IR/MDBuilder.h"
47	#include "llvm/IR/Module.h"
48	#include "llvm/IR/PassManager.h"
49	#include "llvm/IR/ProfDataUtils.h"
50	#include "llvm/InitializePasses.h"
51	#include "llvm/Pass.h"
52	#include "llvm/Support/Casting.h"
53	#include "llvm/Support/CommandLine.h"
54	#include "llvm/Support/ErrorHandling.h"
55	#include "llvm/Target/TargetMachine.h"
56	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
57	#include "llvm/Transforms/Utils/IntegerDivision.h"
58	#include <optional>
59
60	#define DEBUG_TYPE "expand-ir-insts"
61
62	using namespace llvm;
63
64	namespace llvm {
65	extern cl::opt<bool> ProfcheckDisableMetadataFixes;
66	}
67
68	static cl::opt<unsigned>
69	ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
70	cl::init(Val: IntegerType::MAX_INT_BITS),
71	cl::desc("fp convert instructions on integers with "
72	"more than <N> bits are expanded."));
73
74	static cl::opt<unsigned>
75	ExpandDivRemBits("expand-div-rem-bits", cl::Hidden,
76	cl::init(Val: IntegerType::MAX_INT_BITS),
77	cl::desc("div and rem instructions on integers with "
78	"more than <N> bits are expanded."));
79
80	static bool isConstantPowerOfTwo(Value V, bool* SignedOp) {
81	auto *C = dyn_cast<ConstantInt>(Val: V);
82	if (!C)
83	return false;
84
85	APInt Val = C->getValue();
86	if (SignedOp && Val.isNegative())
87	Val = -Val;
88	return Val.isPowerOf2();
89	}
90
91	static bool isSigned(unsigned Opcode) {
92	return Opcode == Instruction::SDiv \|\| Opcode == Instruction::SRem;
93	}
94
95	/// For signed div/rem by a power of 2, compute the bias-adjusted dividend:
96	/// Sign = ashr X, (BitWidth - 1) -- 0 or -1
97	/// Bias = lshr Sign, (BitWidth - ShiftAmt) -- 0 or 2^ShiftAmt - 1
98	/// Adjusted = add X, Bias
99	/// The bias adds (2^ShiftAmt - 1) for negative X, correcting rounding towards
100	/// zero (instead of towards -inf that a plain ashr would give).
101	/// The lshr form is used instead of 'and' to avoid large immediate constants.
102	static Value addSignedBias(IRBuilder<> &Builder, Value X, unsigned BitWidth,
103	unsigned ShiftAmt) {
104	assert(ShiftAmt > `0` && ShiftAmt < BitWidth &&
105	"ShiftAmt out of range; callers should handle ShiftAmt == 0");
106	Value *Sign = Builder.CreateAShr(LHS: X, RHS: BitWidth - `1`, Name: "sign");
107	Value *Bias = Builder.CreateLShr(LHS: Sign, RHS: BitWidth - ShiftAmt, Name: "bias");
108	return Builder.CreateAdd(LHS: X, RHS: Bias, Name: "adjusted");
109	}
110
111	/// Expand division or remainder by a power-of-2 constant.
112	/// Division (let C = log2(\|divisor\|)):
113	/// udiv X, 2^C -> lshr X, C
114	/// sdiv X, 2^C -> ashr (add X, Bias), C (Bias corrects rounding)
115	/// sdiv exact X, 2^C -> ashr exact X, C (no bias needed)
116	/// For negative power-of-2 divisors, the division result is negated.
117	/// Remainder (let C = log2(\|divisor\|)):
118	/// urem X, 2^C -> and X, (2^C - 1)
119	/// srem X, 2^C -> sub X, (shl (ashr (add X, Bias), C), C)
120	static void expandPow2DivRem(BinaryOperator *BO) {
121	LLVM_DEBUG(dbgs() << "Expanding instruction: " << *BO << `'\n'`);
122
123	unsigned Opcode = BO->getOpcode();
124	bool IsDiv = (Opcode == Instruction::UDiv \|\| Opcode == Instruction::SDiv);
125	bool IsSigned = isSigned(Opcode);
126	// isExact() is only valid for div.
127	bool IsExact = IsDiv && BO->isExact();
128
129	assert(isConstantPowerOfTwo(BO->getOperand(`1`), IsSigned) &&
130	"Expected power-of-2 constant divisor");
131
132	Value *X = BO->getOperand(i_nocapture: `0`);
133	auto *C = cast<ConstantInt>(Val: BO->getOperand(i_nocapture: `1`));
134	Type *Ty = BO->getType();
135	unsigned BitWidth = Ty->getIntegerBitWidth();
136
137	APInt DivisorVal = C->getValue();
138	bool IsNegativeDivisor = IsSigned && DivisorVal.isNegative();
139	// Use countr_zero() to get the shift amount directly from the bit pattern.
140	// This works correctly for both positive and negative powers of 2, including
141	// INT_MIN, without needing to negate the value first.
142	unsigned ShiftAmt = DivisorVal.countr_zero();
143
144	IRBuilder<> Builder(BO);
145	Value *Result;
146
147	if (ShiftAmt == `0`) {
148	// Div by 1/-1: X / 1 = X, X / -1 = -X.
149	// Rem by 1/-1: always 0.
150	if (IsDiv)
151	Result = IsNegativeDivisor ? Builder.CreateNeg(V: X) : X;
152	else
153	Result = ConstantInt::get(Ty, V: `0`);
154	} else if (IsSigned) {
155	// The signed expansion uses X multiple times (bias computation, shift,
156	// and sub for remainder). Freeze X to ensure consistent behavior if it is
157	// undef/poison. For exact division, no bias is needed and X is used only
158	// once, so freeze is unnecessary.
159	if (!IsExact && !isGuaranteedNotToBeUndefOrPoison(V: X))
160	X = Builder.CreateFreeze(V: X, Name: X->getName() + ".fr");
161	// For exact division, no bias is needed since there's no rounding.
162	Value *Dividend =
163	IsExact ? X : addSignedBias(Builder, X, BitWidth, ShiftAmt);
164	Value *Quotient = Builder.CreateAShr(
165	LHS: Dividend, RHS: ShiftAmt, Name: IsDiv && IsNegativeDivisor ? "pre.neg" : "shifted",
166	isExact: IsExact);
167	if (IsDiv) {
168	Result = IsNegativeDivisor ? Builder.CreateNeg(V: Quotient) : Quotient;
169	} else {
170	// Rem = X - (Quotient << ShiftAmt):
171	// clear lower ShiftAmt bits via round-trip shift, then subtract.
172	Value *Truncated = Builder.CreateShl(LHS: Quotient, RHS: ShiftAmt, Name: "truncated");
173	Result = Builder.CreateSub(LHS: X, RHS: Truncated);
174	}
175	} else {
176	if (IsDiv) {
177	Result = Builder.CreateLShr(LHS: X, RHS: ShiftAmt, Name: "", isExact: IsExact);
178	} else {
179	APInt Mask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ShiftAmt);
180	Result = Builder.CreateAnd(LHS: X, RHS: ConstantInt::get(Ty, V: Mask));
181	}
182	}
183
184	BO->replaceAllUsesWith(V: Result);
185	if (Result != X)
186	if (auto *RI = dyn_cast<Instruction>(Val: Result))
187	RI->takeName(V: BO);
188	BO->dropAllReferences();
189	BO->eraseFromParent();
190	}
191
192	/// This class implements a precise expansion of the frem instruction.
193	/// The generated code is based on the fmod implementation in the AMD device
194	/// libs.
195	namespace {
196	class FRemExpander {
197	/// The IRBuilder to use for the expansion.
198	IRBuilder<> &B;
199
200	/// Floating point type of the return value and the arguments of the FRem
201	/// instructions that should be expanded.
202	Type *FremTy;
203
204	/// Floating point type to use for the computation. This may be
205	/// wider than the \p FremTy.
206	Type *ComputeFpTy;
207
208	/// Integer type used to hold the exponents returned by frexp.
209	Type *ExTy;
210
211	/// How many bits of the quotient to compute per iteration of the
212	/// algorithm, stored as a value of type \p ExTy.
213	Value *Bits;
214
215	/// Constant 1 of type \p ExTy.
216	Value *One;
217
218	/// The frem argument/return types that can be expanded by this class.
219	// TODO: The expansion could work for other floating point types
220	// as well, but this would require additional testing.
221	static constexpr std::array<MVT, `3`> ExpandableTypes{MVT::f16, MVT::f32,
222	MVT::f64};
223
224	public:
225	static bool canExpandType(Type *Ty) {
226	EVT VT = EVT::getEVT(Ty);
227	assert(VT.isSimple() && "Can expand only simple types");
228
229	return is_contained(Range: ExpandableTypes, Element: VT.getSimpleVT());
230	}
231
232	static bool shouldExpandFremType(const TargetLowering &TLI, EVT VT) {
233	assert(!VT.isVector() && "Cannot handle vector type; must scalarize first");
234	return TLI.getOperationAction(Op: ISD::FREM, VT) ==
235	TargetLowering::LegalizeAction::Expand;
236	}
237
238	static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
239	// Consider scalar type for simplicity. It seems unlikely that a
240	// vector type can be legalized without expansion if the scalar
241	// type cannot.
242	return shouldExpandFremType(TLI, VT: EVT::getEVT(Ty: Ty->getScalarType()));
243	}
244
245	/// Return true if the pass should expand frem instructions of any type
246	/// for the target represented by \p TLI.
247	static bool shouldExpandAnyFremType(const TargetLowering &TLI) {
248	return any_of(Range: ExpandableTypes,
249	P: [&](MVT V) { return shouldExpandFremType(TLI, VT: EVT (V)); });
250	}
251
252	static FRemExpander create(IRBuilder<> &B, Type *Ty) {
253	assert(canExpandType(Ty) && "Expected supported floating point type");
254
255	// The type to use for the computation of the remainder. This may be
256	// wider than the input/result type which affects the ...
257	Type *ComputeTy = Ty;
258	// ... maximum number of iterations of the remainder computation loop
259	// to use. This value is for the case in which the computation
260	// uses the same input/result type.
261	unsigned MaxIter = `2`;
262
263	if (Ty->isHalfTy()) {
264	// Use the wider type and less iterations.
265	ComputeTy = B.getFloatTy();
266	MaxIter = `1`;
267	}
268
269	unsigned Precision = APFloat::semanticsPrecision(Ty->getFltSemantics());
270	return FRemExpander {B, Ty, Precision / MaxIter, ComputeTy};
271	}
272
273	/// Build the FRem expansion for the numerator \p X and the
274	/// denumerator \p Y. The type of X and Y must match \p FremTy. The
275	/// code will be generated at the insertion point of \p B and the
276	/// insertion point will be reset at exit.
277	Value buildFRem(Value X, Value Y, std::optional<SimplifyQuery> &SQ) const*;
278
279	/// Build an approximate FRem expansion for the numerator \p X and
280	/// the denumerator \p Y at the insertion point of builder \p B.
281	/// The type of X and Y must match \p FremTy.
282	Value buildApproxFRem(Value X, Value Y) const*;
283
284	private:
285	FRemExpander(IRBuilder<> &B, Type FremTy, unsigned* Bits, Type *ComputeFpTy)
286	: B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()),
287	Bits(ConstantInt::get(Ty: ExTy, V: Bits)), One(ConstantInt::get(Ty: ExTy, V: `1`)) {}
288
289	Value createRcp(Value V, const Twine &Name) const {
290	// Leave it to later optimizations to turn this into an rcp
291	// instruction if available.
292	return B.CreateFDiv(L: ConstantFP::get(Ty: ComputeFpTy, V: `1.0`), R: V, Name);
293	}
294
295	// Helper function to build the UPDATE_AX code which is common to the
296	// loop body and the "final iteration".
297	Value buildUpdateAx(Value Ax, Value Ay, Value Ayinv) const {
298	// Build:
299	// float q = rint(ax ayinv);*
300	// ax = fma(-q, ay, ax);
301	// int clt = ax < 0.0f;
302	// float axp = ax + ay;
303	// ax = clt ? axp : ax;
304	Value *Q = B.CreateUnaryIntrinsic(ID: Intrinsic::rint, Op: B.CreateFMul(L: Ax, R: Ayinv),
305	FMFSource: {}, Name: "q");
306	Value *AxUpdate = B.CreateFMA(Factor1: B.CreateFNeg(V: Q), Factor2: Ay, Summand: Ax, FMFSource: {}, Name: "ax");
307	Value *Clt = B.CreateFCmp(P: CmpInst::FCMP_OLT, LHS: AxUpdate,
308	RHS: ConstantFP::getZero(Ty: ComputeFpTy), Name: "clt");
309	Value *Axp = B.CreateFAdd(L: AxUpdate, R: Ay, Name: "axp");
310	return B.CreateSelect(C: Clt, True: Axp, False: AxUpdate, Name: "ax");
311	}
312
313	/// Build code to extract the exponent and mantissa of \p Src.
314	/// Return the exponent minus one for use as a loop bound and
315	/// the mantissa taken to the given \p NewExp power.
316	std::pair<Value , Value > buildExpAndPower(Value Src, Value NewExp,
317	const Twine &ExName,
318	const Twine &PowName) const {
319	// Build:
320	// ExName = frexp_exp(Src) - 1;
321	// PowName = fldexp(frexp_mant(ExName), NewExp);
322	Type *Ty = Src->getType();
323	Type *ExTy = B.getInt32Ty();
324	Value *Frexp = B.CreateIntrinsic(ID: Intrinsic::frexp, OverloadTypes: {Ty, ExTy}, Args: Src);
325	Value *Mant = B.CreateExtractValue(Agg: Frexp, Idxs: {`0`});
326	Value *Exp = B.CreateExtractValue(Agg: Frexp, Idxs: {`1`});
327
328	Exp = B.CreateSub(LHS: Exp, RHS: One, Name: ExName);
329	Value *Pow = B.CreateLdexp(Src: Mant, Exp: NewExp, FMFSource: {}, Name: PowName);
330
331	return {Pow, Exp};
332	}
333
334	/// Build the main computation of the remainder for the case in which
335	/// Ax > Ay, where Ax = \|X\|, Ay = \|Y\|, and X is the numerator and Y the
336	/// denumerator. Add the incoming edge from the computation result
337	/// to \p RetPhi.
338	void buildRemainderComputation(Value AxInitial, Value AyInitial, Value *X,
339	PHINode RetPhi, FastMathFlags FMF) const* {
340	IRBuilder<>::FastMathFlagGuard Guard(B);
341	B.setFastMathFlags(FMF);
342
343	// Build:
344	// ex = frexp_exp(ax) - 1;
345	// ax = fldexp(frexp_mant(ax), bits);
346	// ey = frexp_exp(ay) - 1;
347	// ay = fledxp(frexp_mant(ay), 1);
348	auto [Ax, Ex] = buildExpAndPower(Src: AxInitial, NewExp: Bits, ExName: "ex", PowName: "ax");
349	auto [Ay, Ey] = buildExpAndPower(Src: AyInitial, NewExp: One, ExName: "ey", PowName: "ay");
350
351	// Build:
352	// int nb = ex - ey;
353	// float ayinv = 1.0/ay;
354	Value *Nb = B.CreateSub(LHS: Ex, RHS: Ey, Name: "nb");
355	Value *Ayinv = createRcp(V: Ay, Name: "ayinv");
356
357	// Build: while (nb > bits)
358	BasicBlock *PreheaderBB = B.GetInsertBlock();
359	Function *Fun = PreheaderBB->getParent();
360	auto *LoopBB = BasicBlock::Create(Context&: B.getContext(), Name: "frem.loop_body", Parent: Fun);
361	auto *ExitBB = BasicBlock::Create(Context&: B.getContext(), Name: "frem.loop_exit", Parent: Fun);
362
363	B.CreateCondBr(Cond: B.CreateICmp(P: CmpInst::ICMP_SGT, LHS: Nb, RHS: Bits), True: LoopBB, False: ExitBB);
364
365	// Build loop body:
366	// UPDATE_AX
367	// ax = fldexp(ax, bits);
368	// nb -= bits;
369	// One iteration of the loop is factored out. The code shared by
370	// the loop and this "iteration" is denoted by UPDATE_AX.
371	B.SetInsertPoint(LoopBB);
372	PHINode *NbIv = B.CreatePHI(Ty: Nb->getType(), NumReservedValues: `2`, Name: "nb_iv");
373	NbIv->addIncoming(V: Nb, BB: PreheaderBB);
374
375	auto *AxPhi = B.CreatePHI(Ty: ComputeFpTy, NumReservedValues: `2`, Name: "ax_loop_phi");
376	AxPhi->addIncoming(V: Ax, BB: PreheaderBB);
377
378	Value *AxPhiUpdate = buildUpdateAx(Ax: AxPhi, Ay, Ayinv);
379	AxPhiUpdate = B.CreateLdexp(Src: AxPhiUpdate, Exp: Bits, FMFSource: {}, Name: "ax_update");
380	AxPhi->addIncoming(V: AxPhiUpdate, BB: LoopBB);
381	NbIv->addIncoming(V: B.CreateSub(LHS: NbIv, RHS: Bits, Name: "nb_update"), BB: LoopBB);
382
383	B.CreateCondBr(Cond: B.CreateICmp(P: CmpInst::ICMP_SGT, LHS: NbIv, RHS: Bits), True: LoopBB, False: ExitBB);
384
385	// Build final iteration
386	// ax = fldexp(ax, nb - bits + 1);
387	// UPDATE_AX
388	B.SetInsertPoint(ExitBB);
389
390	auto *AxPhiExit = B.CreatePHI(Ty: ComputeFpTy, NumReservedValues: `2`, Name: "ax_exit_phi");
391	AxPhiExit->addIncoming(V: Ax, BB: PreheaderBB);
392	AxPhiExit->addIncoming(V: AxPhi, BB: LoopBB);
393	auto *NbExitPhi = B.CreatePHI(Ty: Nb->getType(), NumReservedValues: `2`, Name: "nb_exit_phi");
394	NbExitPhi->addIncoming(V: NbIv, BB: LoopBB);
395	NbExitPhi->addIncoming(V: Nb, BB: PreheaderBB);
396
397	Value *AxFinal = B.CreateLdexp(
398	Src: AxPhiExit, Exp: B.CreateAdd(LHS: B.CreateSub(LHS: NbExitPhi, RHS: Bits), RHS: One), FMFSource: {}, Name: "ax");
399	AxFinal = buildUpdateAx(Ax: AxFinal, Ay, Ayinv);
400
401	// Build:
402	// ax = fldexp(ax, ey);
403	// ret = copysign(ax,x);
404	AxFinal = B.CreateLdexp(Src: AxFinal, Exp: Ey, FMFSource: {}, Name: "ax");
405	if (ComputeFpTy != FremTy)
406	AxFinal = B.CreateFPTrunc(V: AxFinal, DestTy: FremTy);
407	Value *Ret = B.CreateCopySign(LHS: AxFinal, RHS: X);
408
409	RetPhi->addIncoming(V: Ret, BB: ExitBB);
410	}
411
412	/// Build the else-branch of the conditional in the FRem
413	/// expansion, i.e. the case in wich Ax <= Ay, where Ax = \|X\|, Ay
414	/// = \|Y\|, and X is the numerator and Y the denumerator. Add the
415	/// incoming edge from the result to \p RetPhi.
416	void buildElseBranch(Value Ax, Value Ay, Value X, PHINode RetPhi) const {
417	// Build:
418	// ret = ax == ay ? copysign(0.0f, x) : x;
419	Value *ZeroWithXSign = B.CreateCopySign(LHS: ConstantFP::getZero(Ty: FremTy), RHS: X);
420	Value *Ret = B.CreateSelect(C: B.CreateFCmpOEQ(LHS: Ax, RHS: Ay), True: ZeroWithXSign, False: X);
421
422	RetPhi->addIncoming(V: Ret, BB: B.GetInsertBlock());
423	}
424
425	/// Return a value that is NaN if one of the corner cases concerning
426	/// the inputs \p X and \p Y is detected, and \p Ret otherwise.
427	Value handleInputCornerCases(Value Ret, Value X, Value Y,
428	std::optional<SimplifyQuery> &SQ,
429	bool NoInfs) const {
430	// Build:
431	// ret = (y == 0.0f \|\| isnan(y)) ? QNAN : ret;
432	// ret = isfinite(x) ? ret : QNAN;
433	Value *Nan = ConstantFP::getQNaN(Ty: FremTy);
434	Ret = B.CreateSelect(C: B.CreateFCmpUEQ(LHS: Y, RHS: ConstantFP::getZero(Ty: FremTy)), True: Nan,
435	False: Ret);
436	Value *XFinite =
437	NoInfs \|\| (SQ && isKnownNeverInfinity(V: X, SQ: *SQ))
438	? B.getTrue()
439	: B.CreateFCmpULT(LHS: B.CreateFAbs(V: X), RHS: ConstantFP::getInfinity(Ty: FremTy));
440	Ret = B.CreateSelect(C: XFinite, True: Ret, False: Nan);
441
442	return Ret;
443	}
444	};
445	} // namespace
446
447	Value FRemExpander::buildApproxFRem(Value X, Value Y) const* {
448	IRBuilder<>::FastMathFlagGuard Guard(B);
449	// Propagating the approximate functions flag to the
450	// division leads to an unacceptable drop in precision
451	// on AMDGPU.
452	// TODO Find out if any flags might be worth propagating.
453	B.clearFastMathFlags();
454
455	Value *Quot = B.CreateFDiv(L: X, R: Y);
456	Value *Trunc = B.CreateUnaryIntrinsic(ID: Intrinsic::trunc, Op: Quot, FMFSource: {});
457	Value *Neg = B.CreateFNeg(V: Trunc);
458
459	return B.CreateFMA(Factor1: Neg, Factor2: Y, Summand: X);
460	}
461
462	Value FRemExpander::buildFRem(Value X, Value *Y,
463	std::optional<SimplifyQuery> &SQ) const {
464	assert(X->getType() == FremTy && Y->getType() == FremTy);
465
466	FastMathFlags FMF = B.getFastMathFlags();
467
468	// This function generates the following code structure:
469	// if (abs(x) > abs(y))
470	// { ret = compute remainder }
471	// else
472	// { ret = x or 0 with sign of x }
473	// Adjust ret to NaN/inf in input
474	// return ret
475	Value *Ax = B.CreateFAbs(V: X, FMFSource: {}, Name: "ax");
476	Value *Ay = B.CreateFAbs(V: Y, FMFSource: {}, Name: "ay");
477	if (ComputeFpTy != X->getType()) {
478	Ax = B.CreateFPExt(V: Ax, DestTy: ComputeFpTy, Name: "ax");
479	Ay = B.CreateFPExt(V: Ay, DestTy: ComputeFpTy, Name: "ay");
480	}
481	Value *AxAyCmp = B.CreateFCmpOGT(LHS: Ax, RHS: Ay);
482
483	PHINode *RetPhi = B.CreatePHI(Ty: FremTy, NumReservedValues: `2`, Name: "ret");
484	Value *Ret = RetPhi;
485
486	// We would return NaN in all corner cases handled here.
487	// Hence, if NaNs are excluded, keep the result as it is.
488	if (!FMF.noNaNs())
489	Ret = handleInputCornerCases(Ret, X, Y, SQ, NoInfs: FMF.noInfs());
490
491	Function *Fun = B.GetInsertBlock()->getParent();
492	auto *ThenBB = BasicBlock::Create(Context&: B.getContext(), Name: "frem.compute", Parent: Fun);
493	auto *ElseBB = BasicBlock::Create(Context&: B.getContext(), Name: "frem.else", Parent: Fun);
494	SplitBlockAndInsertIfThenElse(Cond: AxAyCmp, SplitBefore: RetPhi, ThenBlock: &ThenBB, ElseBlock: &ElseBB);
495
496	auto SavedInsertPt = B.GetInsertPoint();
497
498	// Build remainder computation for "then" branch
499	//
500	// The ordered comparison ensures that ax and ay are not NaNs
501	// in the then-branch. Furthermore, y cannot be an infinity and the
502	// check at the end of the function ensures that the result will not
503	// be used if x is an infinity.
504	FastMathFlags ComputeFMF = FMF;
505	ComputeFMF.setNoInfs();
506	ComputeFMF.setNoNaNs();
507
508	B.SetInsertPoint(ThenBB);
509	buildRemainderComputation(AxInitial: Ax, AyInitial: Ay, X, RetPhi, FMF);
510	B.CreateBr(Dest: RetPhi->getParent());
511
512	// Build "else"-branch
513	B.SetInsertPoint(ElseBB);
514	buildElseBranch(Ax, Ay, X, RetPhi);
515	B.CreateBr(Dest: RetPhi->getParent());
516
517	B.SetInsertPoint(SavedInsertPt);
518
519	return Ret;
520	}
521
522	static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) {
523	LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << `'\n'`);
524
525	Type *Ty = I.getType();
526	assert(FRemExpander::canExpandType(Ty) &&
527	"Expected supported floating point type");
528
529	FastMathFlags FMF = I.getFastMathFlags();
530	// TODO Make use of those flags for optimization?
531	FMF.setAllowReciprocal(false);
532	FMF.setAllowContract(false);
533
534	IRBuilder<> B(&I);
535	B.setFastMathFlags(FMF);
536	B.SetCurrentDebugLocation(I.getDebugLoc());
537
538	const FRemExpander Expander = FRemExpander::create(B, Ty);
539	Value *Ret = FMF.approxFunc()
540	? Expander.buildApproxFRem(X: I.getOperand(i_nocapture: `0`), Y: I.getOperand(i_nocapture: `1`))
541	: Expander.buildFRem(X: I.getOperand(i_nocapture: `0`), Y: I.getOperand(i_nocapture: `1`), SQ);
542
543	I.replaceAllUsesWith(V: Ret);
544	Ret->takeName(V: &I);
545	I.eraseFromParent();
546
547	return true;
548	}
549	// clang-format off: preserve formatting of the following example
550
551	/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
552	/// the generated code. This currently generates code similarly to compiler-rt's
553	/// implementations.
554	///
555	/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
556	/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
557	/// entry:
558	/// %0 = bitcast float %a to i32
559	/// %conv.i = zext i32 %0 to i64
560	/// %tobool.not = icmp sgt i32 %0, -1
561	/// %conv = select i1 %tobool.not, i64 1, i64 -1
562	/// %and = lshr i64 %conv.i, 23
563	/// %shr = and i64 %and, 255
564	/// %and2 = and i64 %conv.i, 8388607
565	/// %or = or i64 %and2, 8388608
566	/// %cmp = icmp ult i64 %shr, 127
567	/// br i1 %cmp, label %cleanup, label %if.end
568	///
569	/// if.end: ; preds = %entry
570	/// %sub = add nuw nsw i64 %shr, 4294967169
571	/// %conv5 = and i64 %sub, 4294967232
572	/// %cmp6.not = icmp eq i64 %conv5, 0
573	/// br i1 %cmp6.not, label %if.end12, label %if.then8
574	///
575	/// if.then8: ; preds = %if.end
576	/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64
577	/// -9223372036854775808 br label %cleanup
578	///
579	/// if.end12: ; preds = %if.end
580	/// %cmp13 = icmp ult i64 %shr, 150
581	/// br i1 %cmp13, label %if.then15, label %if.else
582	///
583	/// if.then15: ; preds = %if.end12
584	/// %sub16 = sub nuw nsw i64 150, %shr
585	/// %shr17 = lshr i64 %or, %sub16
586	/// %mul = mul nsw i64 %shr17, %conv
587	/// br label %cleanup
588	///
589	/// if.else: ; preds = %if.end12
590	/// %sub18 = add nsw i64 %shr, -150
591	/// %shl = shl i64 %or, %sub18
592	/// %mul19 = mul nsw i64 %shl, %conv
593	/// br label %cleanup
594	///
595	/// cleanup: ; preds = %entry,
596	/// %if.else, %if.then15, %if.then8
597	/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [
598	/// %mul19, %if.else ], [ 0, %entry ] ret i64 %retval.0
599	/// }
600	///
601	/// Replace fp to integer with generated code.
602	static void expandFPToI(Instruction FPToI, bool* IsSaturating, bool IsSigned) {
603	// clang-format on
604	IRBuilder<> Builder(FPToI);
605	auto *FloatVal = FPToI->getOperand(i: `0`);
606	IntegerType *IntTy = cast<IntegerType>(Val: FPToI->getType());
607
608	unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
609	unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - `1`;
610
611	// FIXME: fp16's range is covered by i32. So `fptoi half` can convert
612	// to i32 first following a sext/zext to target integer type.
613	Value A1 = nullptr*;
614	if (FloatVal->getType()->isHalfTy() && BitWidth >= `32`) {
615	if (FPToI->getOpcode() == Instruction::FPToUI) {
616	Value *A0 = Builder.CreateFPToUI(V: FloatVal, DestTy: Builder.getInt32Ty());
617	A1 = Builder.CreateZExt(V: A0, DestTy: IntTy);
618	} else { // FPToSI
619	Value *A0 = Builder.CreateFPToSI(V: FloatVal, DestTy: Builder.getInt32Ty());
620	A1 = Builder.CreateSExt(V: A0, DestTy: IntTy);
621	}
622	FPToI->replaceAllUsesWith(V: A1);
623	FPToI->dropAllReferences();
624	FPToI->eraseFromParent();
625	return;
626	}
627
628	// fp80 conversion is implemented by fpext to fp128 first then do the
629	// conversion.
630	FPMantissaWidth = FPMantissaWidth == `63` ? `112` : FPMantissaWidth;
631	unsigned FloatWidth =
632	PowerOf2Ceil(A: FloatVal->getType()->getScalarSizeInBits());
633	unsigned ExponentWidth = FloatWidth - FPMantissaWidth - `1`;
634	unsigned ExponentBias = (`1` << (ExponentWidth - `1`)) - `1`;
635	IntegerType *FloatIntTy = Builder.getIntNTy(N: FloatWidth);
636	Value *ImplicitBit = ConstantInt::get(
637	Ty: FloatIntTy, V: APInt::getOneBitSet(numBits: FloatWidth, BitNo: FPMantissaWidth));
638	Value *SignificandMask = ConstantInt::get(
639	Ty: FloatIntTy, V: APInt::getLowBitsSet(numBits: FloatWidth, loBitsSet: FPMantissaWidth));
640
641	BasicBlock *Entry = Builder.GetInsertBlock();
642	Function *F = Entry->getParent();
643	Entry->setName(Twine (Entry->getName(), "fp-to-i-entry"));
644	BasicBlock CheckSaturateBB, SaturateBB;
645	BasicBlock *End =
646	Entry->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "fp-to-i-cleanup");
647	if (IsSaturating) {
648	CheckSaturateBB = BasicBlock::Create(Context&: Builder.getContext(),
649	Name: "fp-to-i-if-check.saturate", Parent: F, InsertBefore: End);
650	SaturateBB =
651	BasicBlock::Create(Context&: Builder.getContext(), Name: "fp-to-i-if-saturate", Parent: F, InsertBefore: End);
652	}
653	BasicBlock *CheckExpSizeBB = BasicBlock::Create(
654	Context&: Builder.getContext(), Name: "fp-to-i-if-check.exp.size", Parent: F, InsertBefore: End);
655	BasicBlock *ExpSmallBB =
656	BasicBlock::Create(Context&: Builder.getContext(), Name: "fp-to-i-if-exp.small", Parent: F, InsertBefore: End);
657	BasicBlock *ExpLargeBB =
658	BasicBlock::Create(Context&: Builder.getContext(), Name: "fp-to-i-if-exp.large", Parent: F, InsertBefore: End);
659
660	Entry->getTerminator()->eraseFromParent();
661
662	// entry:
663	Builder.SetInsertPoint(Entry);
664	// We're going to introduce branches on the value, so freeze it.
665	if (!isGuaranteedNotToBeUndefOrPoison(V: FloatVal))
666	FloatVal = Builder.CreateFreeze(V: FloatVal);
667	// fp80 conversion is implemented by fpext to fp128 first then do the
668	// conversion.
669	if (FloatVal->getType()->isX86_FP80Ty())
670	FloatVal =
671	Builder.CreateFPExt(V: FloatVal, DestTy: Type::getFP128Ty(C&: Builder.getContext()));
672	Value *ARep = Builder.CreateBitCast(V: FloatVal, DestTy: FloatIntTy);
673	Value PosOrNeg, Sign;
674	if (IsSigned) {
675	PosOrNeg =
676	Builder.CreateICmpSGT(LHS: ARep, RHS: ConstantInt::getSigned(Ty: FloatIntTy, V: -`1`));
677	Sign = Builder.CreateSelectWithUnknownProfile(
678	C: PosOrNeg, True: ConstantInt::getSigned(Ty: IntTy, V: `1`),
679	False: ConstantInt::getSigned(Ty: IntTy, V: -`1`), PassName: "sign");
680	}
681	Value *And =
682	Builder.CreateLShr(LHS: ARep, RHS: Builder.getIntN(N: FloatWidth, C: FPMantissaWidth));
683	Value *BiasedExp = Builder.CreateAnd(
684	LHS: And, RHS: Builder.getIntN(N: FloatWidth, C: (`1` << ExponentWidth) - `1`), Name: "biased.exp");
685	Value *Abs = Builder.CreateAnd(LHS: ARep, RHS: SignificandMask);
686	Value *Significand = Builder.CreateOr(LHS: Abs, RHS: ImplicitBit, Name: "significand");
687	Value *ZeroResultCond = Builder.CreateICmpULT(
688	LHS: BiasedExp, RHS: Builder.getIntN(N: FloatWidth, C: ExponentBias), Name: "exp.is.negative");
689	if (IsSaturating) {
690	Value *IsNaN = Builder.CreateFCmpUNO(LHS: FloatVal, RHS: FloatVal, Name: "is.nan");
691	ZeroResultCond = Builder.CreateOr(LHS: ZeroResultCond, RHS: IsNaN);
692	if (!IsSigned) {
693	Value *IsNeg = Builder.CreateIsNeg(Arg: ARep);
694	ZeroResultCond = Builder.CreateOr(LHS: ZeroResultCond, RHS: IsNeg);
695	}
696	}
697	Instruction *CondBr = Builder.CreateCondBr(
698	Cond: ZeroResultCond, True: End, False: IsSaturating ? CheckSaturateBB : CheckExpSizeBB);
699	// We do not have any information on the value of the exponent, so mark the
700	// branch weights as unkown.
701	setExplicitlyUnknownBranchWeightsIfProfiled(I&: *CondBr, DEBUG_TYPE, F);
702
703	Value *Saturated;
704	if (IsSaturating) {
705	// check.saturate:
706	Builder.SetInsertPoint(CheckSaturateBB);
707	uint64_t SaturatingBiasedExp =
708	static_cast<uint64_t>(ExponentBias) + BitWidth - IsSigned;
709	// Clamp to the all-ones (inf/NaN) exponent. Without this, when the integer
710	// is wide enough to hold every finite float the threshold exceeds any
711	// possible biased exponent, so +/-inf would never saturate.
712	uint64_t MaxBiasedExp = (`1ULL` << ExponentWidth) - `1`;
713	if (SaturatingBiasedExp > MaxBiasedExp)
714	SaturatingBiasedExp = MaxBiasedExp;
715	Value *Cmp3 = Builder.CreateICmpUGE(
716	LHS: BiasedExp, RHS: ConstantInt::get(Ty: FloatIntTy, V: SaturatingBiasedExp));
717	Value *CondBrSat = Builder.CreateCondBr(Cond: Cmp3, True: SaturateBB, False: CheckExpSizeBB);
718	// Saturation is considered an unlikely event.
719	applyProfMetadataIfEnabled(V: CondBrSat, setMetadataCallback: [&](Instruction *Inst) {
720	Inst->setMetadata(
721	KindID: LLVMContext::MD_prof,
722	Node: MDBuilder (Inst->getContext()).createUnlikelyBranchWeights());
723	});
724
725	// saturate:
726	Builder.SetInsertPoint(SaturateBB);
727	if (IsSigned) {
728	Value *SignedMax =
729	ConstantInt::get(Ty: IntTy, V: APInt::getSignedMaxValue(numBits: BitWidth));
730	Value *SignedMin =
731	ConstantInt::get(Ty: IntTy, V: APInt::getSignedMinValue(numBits: BitWidth));
732	// Select between the signed max and min values for saturation.
733	Saturated = Builder.CreateSelectWithUnknownProfile(
734	C: PosOrNeg, True: SignedMax, False: SignedMin, PassName: "saturated");
735	} else {
736	Saturated = ConstantInt::getAllOnesValue(Ty: IntTy);
737	}
738	Builder.CreateBr(Dest: End);
739	}
740
741	// if.end9:
742	Builder.SetInsertPoint(CheckExpSizeBB);
743	Value *ExpSmallerMantissaWidth = Builder.CreateICmpULT(
744	LHS: BiasedExp, RHS: Builder.getIntN(N: FloatWidth, C: ExponentBias + FPMantissaWidth),
745	Name: "exp.smaller.mantissa.width");
746	// We cannot determine whether this is a left shift or a right shift,
747	// so we mark the branch weights as unknown.
748	Value *CondBr2 =
749	Builder.CreateCondBr(Cond: ExpSmallerMantissaWidth, True: ExpSmallBB, False: ExpLargeBB);
750	applyProfMetadataIfEnabled(V: CondBr2, setMetadataCallback: [&](Instruction *Inst) {
751	setExplicitlyUnknownBranchWeightsIfProfiled(I&: *Inst, DEBUG_TYPE, F);
752	});
753
754	// exp.small:
755	Builder.SetInsertPoint(ExpSmallBB);
756	Value *Sub13 = Builder.CreateSub(
757	LHS: Builder.getIntN(N: FloatWidth, C: ExponentBias + FPMantissaWidth), RHS: BiasedExp);
758	Value *ExpSmallRes =
759	Builder.CreateZExtOrTrunc(V: Builder.CreateLShr(LHS: Significand, RHS: Sub13), DestTy: IntTy);
760	if (IsSigned)
761	ExpSmallRes = Builder.CreateMul(LHS: ExpSmallRes, RHS: Sign);
762	Builder.CreateBr(Dest: End);
763
764	// exp.large:
765	Builder.SetInsertPoint(ExpLargeBB);
766	Value *Sub15 = Builder.CreateAdd(
767	LHS: BiasedExp,
768	RHS: ConstantInt::getSigned(
769	Ty: FloatIntTy, V: -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
770	Value *SignificandCast = Builder.CreateZExtOrTrunc(V: Significand, DestTy: IntTy);
771	Value *ExpLargeRes = Builder.CreateShl(
772	LHS: SignificandCast, RHS: Builder.CreateZExtOrTrunc(V: Sub15, DestTy: IntTy));
773	if (IsSigned)
774	ExpLargeRes = Builder.CreateMul(LHS: ExpLargeRes, RHS: Sign);
775	Builder.CreateBr(Dest: End);
776
777	// cleanup:
778	Builder.SetInsertPoint(TheBB: End, IP: End->begin());
779	PHINode *Retval0 = Builder.CreatePHI(Ty: FPToI->getType(), NumReservedValues: `3` + IsSaturating);
780
781	if (IsSaturating)
782	Retval0->addIncoming(V: Saturated, BB: SaturateBB);
783	Retval0->addIncoming(V: ExpSmallRes, BB: ExpSmallBB);
784	Retval0->addIncoming(V: ExpLargeRes, BB: ExpLargeBB);
785	Retval0->addIncoming(V: Builder.getIntN(N: BitWidth, C: `0`), BB: Entry);
786
787	FPToI->replaceAllUsesWith(V: Retval0);
788	FPToI->dropAllReferences();
789	FPToI->eraseFromParent();
790	}
791
792	// clang-format off: preserve formatting of the following example
793
794	/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
795	/// the generated code. This currently generates code similarly to compiler-rt's
796	/// implementations. This implementation has an implicit assumption that integer
797	/// width is larger than fp.
798	///
799	/// An example IR generated from compiler-rt/floatdisf.c looks like below:
800	/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
801	/// entry:
802	/// %cmp = icmp eq i64 %a, 0
803	/// br i1 %cmp, label %return, label %if.end
804	///
805	/// if.end: ; preds = %entry
806	/// %shr = ashr i64 %a, 63
807	/// %xor = xor i64 %shr, %a
808	/// %sub = sub nsw i64 %xor, %shr
809	/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
810	/// %cast = trunc i64 %0 to i32
811	/// %sub1 = sub nuw nsw i32 64, %cast
812	/// %sub2 = xor i32 %cast, 63
813	/// %cmp3 = icmp ult i32 %cast, 40
814	/// br i1 %cmp3, label %if.then4, label %if.else
815	///
816	/// if.then4: ; preds = %if.end
817	/// switch i32 %sub1, label %sw.default [
818	/// i32 25, label %sw.bb
819	/// i32 26, label %sw.epilog
820	/// ]
821	///
822	/// sw.bb: ; preds = %if.then4
823	/// %shl = shl i64 %sub, 1
824	/// br label %sw.epilog
825	///
826	/// sw.default: ; preds = %if.then4
827	/// %sub5 = sub nsw i64 38, %0
828	/// %sh_prom = and i64 %sub5, 4294967295
829	/// %shr6 = lshr i64 %sub, %sh_prom
830	/// %shr9 = lshr i64 274877906943, %0
831	/// %and = and i64 %shr9, %sub
832	/// %cmp10 = icmp ne i64 %and, 0
833	/// %conv11 = zext i1 %cmp10 to i64
834	/// %or = or i64 %shr6, %conv11
835	/// br label %sw.epilog
836	///
837	/// sw.epilog: ; preds = %sw.default,
838	/// %if.then4, %sw.bb
839	/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl,
840	/// %sw.bb ] %1 = lshr i64 %a.addr.0, 2 %2 = and i64 %1, 1 %or16 = or i64 %2,
841	/// %a.addr.0 %inc = add nsw i64 %or16, 1 %3 = and i64 %inc, 67108864
842	/// %tobool.not = icmp eq i64 %3, 0
843	/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
844	/// %spec.select = ashr i64 %inc, %spec.select.v
845	/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
846	/// br label %if.end26
847	///
848	/// if.else: ; preds = %if.end
849	/// %sub23 = add nuw nsw i64 %0, 4294967256
850	/// %sh_prom24 = and i64 %sub23, 4294967295
851	/// %shl25 = shl i64 %sub, %sh_prom24
852	/// br label %if.end26
853	///
854	/// if.end26: ; preds = %sw.epilog,
855	/// %if.else
856	/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
857	/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
858	/// %conv27 = trunc i64 %shr to i32
859	/// %and28 = and i32 %conv27, -2147483648
860	/// %add = shl nuw nsw i32 %e.0, 23
861	/// %shl29 = add nuw nsw i32 %add, 1065353216
862	/// %conv31 = trunc i64 %a.addr.1 to i32
863	/// %and32 = and i32 %conv31, 8388607
864	/// %or30 = or i32 %and32, %and28
865	/// %or33 = or i32 %or30, %shl29
866	/// %4 = bitcast i32 %or33 to float
867	/// br label %return
868	///
869	/// return: ; preds = %entry,
870	/// %if.end26
871	/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
872	/// ret float %retval.0
873	/// }
874	///
875	/// Replace integer to fp with generated code.
876	static void expandIToFP(Instruction *IToFP) {
877	// clang-format on
878	IRBuilder<> Builder(IToFP);
879	auto *IntVal = IToFP->getOperand(i: `0`);
880	IntegerType *IntTy = cast<IntegerType>(Val: IntVal->getType());
881
882	unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
883	unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - `1`;
884	// fp80 conversion is implemented by conversion tp fp128 first following
885	// a fptrunc to fp80.
886	FPMantissaWidth = FPMantissaWidth == `63` ? `112` : FPMantissaWidth;
887	// FIXME: As there is no related builtins added in compliler-rt,
888	// here currently utilized the fp32 <-> fp16 lib calls to implement.
889	FPMantissaWidth = FPMantissaWidth == `10` ? `23` : FPMantissaWidth;
890	FPMantissaWidth = FPMantissaWidth == `7` ? `23` : FPMantissaWidth;
891	unsigned FloatWidth = PowerOf2Ceil(A: FPMantissaWidth);
892	bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
893
894	// We're going to introduce branches on the value, so freeze it.
895	if (!isGuaranteedNotToBeUndefOrPoison(V: IntVal))
896	IntVal = Builder.CreateFreeze(V: IntVal);
897
898	// The expansion below assumes that int width >= float width. Zero or sign
899	// extend the integer accordingly.
900	if (BitWidth < FloatWidth) {
901	BitWidth = FloatWidth;
902	IntTy = Builder.getIntNTy(N: BitWidth);
903	IntVal = Builder.CreateIntCast(V: IntVal, DestTy: IntTy, isSigned: IsSigned);
904	}
905
906	Value *Temp1 =
907	Builder.CreateShl(LHS: Builder.getIntN(N: BitWidth, C: `1`),
908	RHS: Builder.getIntN(N: BitWidth, C: FPMantissaWidth + `3`));
909
910	BasicBlock *Entry = Builder.GetInsertBlock();
911	Function *F = Entry->getParent();
912	Entry->setName(Twine (Entry->getName(), "itofp-entry"));
913	BasicBlock *End =
914	Entry->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "itofp-return");
915	BasicBlock *IfEnd =
916	BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-end", Parent: F, InsertBefore: End);
917	BasicBlock *IfThen4 =
918	BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-then4", Parent: F, InsertBefore: End);
919	BasicBlock *SwBB =
920	BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-sw-bb", Parent: F, InsertBefore: End);
921	BasicBlock *SwDefault =
922	BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-sw-default", Parent: F, InsertBefore: End);
923	BasicBlock *SwEpilog =
924	BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-sw-epilog", Parent: F, InsertBefore: End);
925	BasicBlock *IfThen20 =
926	BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-then20", Parent: F, InsertBefore: End);
927	BasicBlock *IfElse =
928	BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-else", Parent: F, InsertBefore: End);
929	BasicBlock *IfEnd26 =
930	BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-end26", Parent: F, InsertBefore: End);
931
932	Entry->getTerminator()->eraseFromParent();
933
934	Function *CTLZ =
935	Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: Intrinsic::ctlz, OverloadTys: IntTy);
936	ConstantInt *True = Builder.getTrue();
937
938	// entry:
939	Builder.SetInsertPoint(Entry);
940	// We assume that the zero is an unlikely input case, so the branch to 'End'
941	// is the unlikely path.
942	Value *Cmp = Builder.CreateICmpEQ(LHS: IntVal, RHS: ConstantInt::getSigned(Ty: IntTy, V: `0`));
943	Value *CondBrEntry = Builder.CreateCondBr(Cond: Cmp, True: End, False: IfEnd);
944	applyProfMetadataIfEnabled(V: CondBrEntry, setMetadataCallback: [&](Instruction *Inst) {
945	Inst->setMetadata(
946	KindID: LLVMContext::MD_prof,
947	Node: MDBuilder (Inst->getContext()).createUnlikelyBranchWeights());
948	});
949
950	// if.end:
951	Builder.SetInsertPoint(IfEnd);
952	Value *Shr =
953	Builder.CreateAShr(LHS: IntVal, RHS: Builder.getIntN(N: BitWidth, C: BitWidth - `1`));
954	Value *Xor = Builder.CreateXor(LHS: Shr, RHS: IntVal);
955	Value *Sub = Builder.CreateSub(LHS: Xor, RHS: Shr);
956	Value *Call = Builder.CreateCall(Callee: CTLZ, Args: {IsSigned ? Sub : IntVal, True});
957	Value *Cast = Builder.CreateTrunc(V: Call, DestTy: Builder.getInt32Ty());
958	int BitWidthNew = FloatWidth == `128` ? BitWidth : `32`;
959	Value *Sub1 = Builder.CreateSub(LHS: Builder.getIntN(N: BitWidthNew, C: BitWidth),
960	RHS: FloatWidth == `128` ? Call : Cast);
961	Value *Sub2 = Builder.CreateSub(LHS: Builder.getIntN(N: BitWidthNew, C: BitWidth - `1`),
962	RHS: FloatWidth == `128` ? Call : Cast);
963	Value *Cmp3 = Builder.CreateICmpSGT(
964	LHS: Sub1, RHS: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + `1`));
965	// This branch handles the rare case where rounding the mantissa causes a
966	// carry-out at the most significant bit, necessitating an increment of the
967	// exponent. This is rare case, so the True path is mared as likely.
968	Value *CondBrIfEnd = Builder.CreateCondBr(Cond: Cmp3, True: IfThen4, False: IfElse);
969	applyProfMetadataIfEnabled(V: CondBrIfEnd, setMetadataCallback: [&](Instruction *Inst) {
970	Inst->setMetadata(
971	KindID: LLVMContext::MD_prof,
972	Node: MDBuilder (Inst->getContext()).createLikelyBranchWeights());
973	});
974
975	// if.then4:
976	Builder.SetInsertPoint(IfThen4);
977	SwitchInst *SI = Builder.CreateSwitch(V: Sub1, Dest: SwDefault);
978	SI->addCase(OnVal: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + `2`), Dest: SwBB);
979	SI->addCase(OnVal: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + `3`), Dest: SwEpilog);
980	// Add branch weights to the SwitchInst. The weights are provided for the
981	// default case first (SwDefault), followed by each explicit case in the
982	// order they were added (SwBB, then SwEpilog). Because the following cases
983	// are rare, the defalut case is given a likely weight.
984	if (!ProfcheckDisableMetadataFixes) {
985	SI->setMetadata(
986	KindID: LLVMContext::MD_prof,
987	Node: MDBuilder (SI->getContext())
988	.createBranchWeights(Weights: {llvm::MDBuilder::kLikelyBranchWeight,
989	llvm::MDBuilder::kUnlikelyBranchWeight,
990	llvm::MDBuilder::kUnlikelyBranchWeight}));
991	}
992
993	// sw.bb:
994	Builder.SetInsertPoint(SwBB);
995	Value *Shl =
996	Builder.CreateShl(LHS: IsSigned ? Sub : IntVal, RHS: Builder.getIntN(N: BitWidth, C: `1`));
997	Builder.CreateBr(Dest: SwEpilog);
998
999	// sw.default:
1000	Builder.SetInsertPoint(SwDefault);
1001	Value *Sub5 = Builder.CreateSub(
1002	LHS: Builder.getIntN(N: BitWidthNew, C: BitWidth - FPMantissaWidth - `3`),
1003	RHS: FloatWidth == `128` ? Call : Cast);
1004	Value *ShProm = Builder.CreateZExt(V: Sub5, DestTy: IntTy);
1005	Value *Shr6 = Builder.CreateLShr(LHS: IsSigned ? Sub : IntVal,
1006	RHS: FloatWidth == `128` ? Sub5 : ShProm);
1007	Value *Sub8 =
1008	Builder.CreateAdd(LHS: FloatWidth == `128` ? Call : Cast,
1009	RHS: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + `3`));
1010	Value *ShProm9 = Builder.CreateZExt(V: Sub8, DestTy: IntTy);
1011	Value *Shr9 = Builder.CreateLShr(LHS: ConstantInt::getSigned(Ty: IntTy, V: -`1`),
1012	RHS: FloatWidth == `128` ? Sub8 : ShProm9);
1013	Value *And = Builder.CreateAnd(LHS: Shr9, RHS: IsSigned ? Sub : IntVal);
1014	Value *Cmp10 = Builder.CreateICmpNE(LHS: And, RHS: Builder.getIntN(N: BitWidth, C: `0`));
1015	Value *Conv11 = Builder.CreateZExt(V: Cmp10, DestTy: IntTy);
1016	Value *Or = Builder.CreateOr(LHS: Shr6, RHS: Conv11);
1017	Builder.CreateBr(Dest: SwEpilog);
1018
1019	// sw.epilog:
1020	Builder.SetInsertPoint(SwEpilog);
1021	PHINode *AAddr0 = Builder.CreatePHI(Ty: IntTy, NumReservedValues: `3`);
1022	AAddr0->addIncoming(V: Or, BB: SwDefault);
1023	AAddr0->addIncoming(V: IsSigned ? Sub : IntVal, BB: IfThen4);
1024	AAddr0->addIncoming(V: Shl, BB: SwBB);
1025	Value *A0 = Builder.CreateTrunc(V: AAddr0, DestTy: Builder.getInt32Ty());
1026	Value *A1 = Builder.CreateLShr(LHS: A0, RHS: Builder.getInt32(C: `2`));
1027	Value *A2 = Builder.CreateAnd(LHS: A1, RHS: Builder.getInt32(C: `1`));
1028	Value *Conv16 = Builder.CreateZExt(V: A2, DestTy: IntTy);
1029	Value *Or17 = Builder.CreateOr(LHS: AAddr0, RHS: Conv16);
1030	Value *Inc = Builder.CreateAdd(LHS: Or17, RHS: Builder.getIntN(N: BitWidth, C: `1`));
1031	Value Shr18 = nullptr*;
1032	if (IsSigned)
1033	Shr18 = Builder.CreateAShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: `2`));
1034	else
1035	Shr18 = Builder.CreateLShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: `2`));
1036	Value *A3 = Builder.CreateAnd(LHS: Inc, RHS: Temp1, Name: "a3");
1037	Value *PosOrNeg = Builder.CreateICmpEQ(LHS: A3, RHS: Builder.getIntN(N: BitWidth, C: `0`));
1038	Value *ExtractT60 = Builder.CreateTrunc(V: Shr18, DestTy: Builder.getIntNTy(N: FloatWidth));
1039	Value *Extract63 = Builder.CreateLShr(LHS: Shr18, RHS: Builder.getIntN(N: BitWidth, C: `32`));
1040	Value ExtractT64 = nullptr*;
1041	if (FloatWidth > `80`)
1042	ExtractT64 = Builder.CreateTrunc(V: Sub2, DestTy: Builder.getInt64Ty());
1043	else
1044	ExtractT64 = Builder.CreateTrunc(V: Extract63, DestTy: Builder.getInt32Ty());
1045	// Rounding usually keeps the exponent within its current magnitude and
1046	// overflow is rare. The False path is unlikely to be taken.
1047	Value *CondBrSwEpilog = Builder.CreateCondBr(Cond: PosOrNeg, True: IfEnd26, False: IfThen20);
1048	applyProfMetadataIfEnabled(V: CondBrSwEpilog, setMetadataCallback: [&](Instruction *Inst) {
1049	Inst->setMetadata(
1050	KindID: LLVMContext::MD_prof,
1051	Node: MDBuilder (Inst->getContext()).createLikelyBranchWeights());
1052	});
1053
1054	// if.then20
1055	Builder.SetInsertPoint(IfThen20);
1056	Value Shr21 = nullptr*;
1057	if (IsSigned)
1058	Shr21 = Builder.CreateAShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: `3`));
1059	else
1060	Shr21 = Builder.CreateLShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: `3`));
1061	Value *ExtractT = Builder.CreateTrunc(V: Shr21, DestTy: Builder.getIntNTy(N: FloatWidth));
1062	Value *Extract = Builder.CreateLShr(LHS: Shr21, RHS: Builder.getIntN(N: BitWidth, C: `32`));
1063	Value ExtractT62 = nullptr*;
1064	if (FloatWidth > `80`)
1065	ExtractT62 = Builder.CreateTrunc(V: Sub1, DestTy: Builder.getInt64Ty());
1066	else
1067	ExtractT62 = Builder.CreateTrunc(V: Extract, DestTy: Builder.getInt32Ty());
1068	Builder.CreateBr(Dest: IfEnd26);
1069
1070	// if.else:
1071	Builder.SetInsertPoint(IfElse);
1072	Value *Sub24 = Builder.CreateAdd(
1073	LHS: FloatWidth == `128` ? Call : Cast,
1074	RHS: ConstantInt::getSigned(Ty: Builder.getIntNTy(N: BitWidthNew),
1075	V: -(int)(BitWidth - FPMantissaWidth - `1`)));
1076	Value *ShProm25 = Builder.CreateZExt(V: Sub24, DestTy: IntTy);
1077	Value *Shl26 = Builder.CreateShl(LHS: IsSigned ? Sub : IntVal,
1078	RHS: FloatWidth == `128` ? Sub24 : ShProm25);
1079	Value *ExtractT61 = Builder.CreateTrunc(V: Shl26, DestTy: Builder.getIntNTy(N: FloatWidth));
1080	Value *Extract65 = Builder.CreateLShr(LHS: Shl26, RHS: Builder.getIntN(N: BitWidth, C: `32`));
1081	Value ExtractT66 = nullptr*;
1082	if (FloatWidth > `80`)
1083	ExtractT66 = Builder.CreateTrunc(V: Sub2, DestTy: Builder.getInt64Ty());
1084	else
1085	ExtractT66 = Builder.CreateTrunc(V: Extract65, DestTy: Builder.getInt32Ty());
1086	Builder.CreateBr(Dest: IfEnd26);
1087
1088	// if.end26:
1089	Builder.SetInsertPoint(IfEnd26);
1090	PHINode *AAddr1Off0 = Builder.CreatePHI(Ty: Builder.getIntNTy(N: FloatWidth), NumReservedValues: `3`);
1091	AAddr1Off0->addIncoming(V: ExtractT, BB: IfThen20);
1092	AAddr1Off0->addIncoming(V: ExtractT60, BB: SwEpilog);
1093	AAddr1Off0->addIncoming(V: ExtractT61, BB: IfElse);
1094	PHINode AAddr1Off32 = nullptr*;
1095	if (FloatWidth > `32`) {
1096	AAddr1Off32 =
1097	Builder.CreatePHI(Ty: Builder.getIntNTy(N: FloatWidth > `80` ? `64` : `32`), NumReservedValues: `3`);
1098	AAddr1Off32->addIncoming(V: ExtractT62, BB: IfThen20);
1099	AAddr1Off32->addIncoming(V: ExtractT64, BB: SwEpilog);
1100	AAddr1Off32->addIncoming(V: ExtractT66, BB: IfElse);
1101	}
1102	PHINode E0 = nullptr*;
1103	if (FloatWidth <= `80`) {
1104	E0 = Builder.CreatePHI(Ty: Builder.getIntNTy(N: BitWidthNew), NumReservedValues: `3`);
1105	E0->addIncoming(V: Sub1, BB: IfThen20);
1106	E0->addIncoming(V: Sub2, BB: SwEpilog);
1107	E0->addIncoming(V: Sub2, BB: IfElse);
1108	}
1109	Value And29 = nullptr*;
1110	if (FloatWidth > `80`) {
1111	Value *Temp2 = Builder.CreateShl(LHS: Builder.getIntN(N: BitWidth, C: `1`),
1112	RHS: Builder.getIntN(N: BitWidth, C: `63`));
1113	And29 = Builder.CreateAnd(LHS: Shr, RHS: Temp2, Name: "and29");
1114	} else {
1115	Value *Conv28 = Builder.CreateTrunc(V: Shr, DestTy: Builder.getInt32Ty());
1116	And29 = Builder.CreateAnd(
1117	LHS: Conv28, RHS: ConstantInt::get(Context&: Builder.getContext(), V: APInt::getSignMask(BitWidth: `32`)));
1118	}
1119	unsigned TempMod = FPMantissaWidth % `32`;
1120	Value And34 = nullptr*;
1121	Value Shl30 = nullptr*;
1122	if (FloatWidth > `80`) {
1123	TempMod += `32`;
1124	Value *Add = Builder.CreateShl(LHS: AAddr1Off32, RHS: Builder.getInt64(C: TempMod));
1125	Shl30 = Builder.CreateAdd(
1126	LHS: Add, RHS: Builder.getInt64(C: ((`1ull` << (`62ull` - TempMod)) - `1ull`) << TempMod));
1127	And34 = Builder.CreateZExt(V: Shl30, DestTy: Builder.getInt128Ty());
1128	} else {
1129	Value *Add = Builder.CreateShl(LHS: E0, RHS: Builder.getInt32(C: TempMod));
1130	Shl30 = Builder.CreateAdd(
1131	LHS: Add, RHS: Builder.getInt32(C: ((`1` << (`30` - TempMod)) - `1`) << TempMod));
1132	And34 = Builder.CreateAnd(LHS: FloatWidth > `32` ? AAddr1Off32 : AAddr1Off0,
1133	RHS: Builder.getInt32(C: (`1` << TempMod) - `1`));
1134	}
1135	Value Or35 = nullptr*;
1136	if (FloatWidth > `80`) {
1137	Value *And29Trunc = Builder.CreateTrunc(V: And29, DestTy: Builder.getInt128Ty());
1138	Value *Or31 = Builder.CreateOr(LHS: And29Trunc, RHS: And34);
1139	Value *Or34 = Builder.CreateShl(LHS: Or31, RHS: Builder.getIntN(N: `128`, C: `64`));
1140	Value *Temp3 = Builder.CreateShl(LHS: Builder.getIntN(N: `128`, C: `1`),
1141	RHS: Builder.getIntN(N: `128`, C: FPMantissaWidth));
1142	Value *Temp4 = Builder.CreateSub(LHS: Temp3, RHS: Builder.getIntN(N: `128`, C: `1`));
1143	Value *A6 = Builder.CreateAnd(LHS: AAddr1Off0, RHS: Temp4);
1144	Or35 = Builder.CreateOr(LHS: Or34, RHS: A6);
1145	} else {
1146	Value *Or31 = Builder.CreateOr(LHS: And34, RHS: And29);
1147	Or35 = Builder.CreateOr(LHS: IsSigned ? Or31 : And34, RHS: Shl30);
1148	}
1149	Value A4 = nullptr*;
1150	if (IToFP->getType()->isDoubleTy()) {
1151	Value *ZExt1 = Builder.CreateZExt(V: Or35, DestTy: Builder.getIntNTy(N: FloatWidth));
1152	Value *Shl1 = Builder.CreateShl(LHS: ZExt1, RHS: Builder.getIntN(N: FloatWidth, C: `32`));
1153	Value *And1 =
1154	Builder.CreateAnd(LHS: AAddr1Off0, RHS: Builder.getIntN(N: FloatWidth, C: `0xFFFFFFFF`));
1155	Value *Or1 = Builder.CreateOr(LHS: Shl1, RHS: And1);
1156	A4 = Builder.CreateBitCast(V: Or1, DestTy: IToFP->getType());
1157	} else if (IToFP->getType()->isX86_FP80Ty()) {
1158	Value *A40 =
1159	Builder.CreateBitCast(V: Or35, DestTy: Type::getFP128Ty(C&: Builder.getContext()));
1160	A4 = Builder.CreateFPTrunc(V: A40, DestTy: IToFP->getType());
1161	} else if (IToFP->getType()->isHalfTy() \|\| IToFP->getType()->isBFloatTy()) {
1162	// Deal with "half" situation. This is a workaround since we don't have
1163	// floattihf.c currently as referring.
1164	Value *A40 =
1165	Builder.CreateBitCast(V: Or35, DestTy: Type::getFloatTy(C&: Builder.getContext()));
1166	A4 = Builder.CreateFPTrunc(V: A40, DestTy: IToFP->getType());
1167	} else // float type
1168	A4 = Builder.CreateBitCast(V: Or35, DestTy: IToFP->getType());
1169
1170	// Sub2 is the unbiased exponent (the index of the top set bit in the input).
1171	// The exponent arithmetic above wraps to garbage instead of inf once it
1172	// overflows the exponent field, so saturate to a correctly-signed infinity
1173	// when Sub2 reaches 1 << (ExponentWidth - 1). Sub2 is at most BitWidth - 1,
1174	// so skip the check entirely when even that can't reach the threshold.
1175	// (Values that round up* into inf, e.g. 2^n - 1, keep Sub2 = BitWidth - 1;*
1176	// these are handled by the conversion's own rounding, not by this
1177	// saturation.)
1178	unsigned ExponentWidth = FloatWidth - FPMantissaWidth - `1`;
1179	uint64_t MinInfExp = `1ULL` << (ExponentWidth - `1`);
1180	if (BitWidth - `1` >= MinInfExp) {
1181	Value *MinInfExpVal = Builder.getIntN(N: BitWidthNew, C: MinInfExp);
1182	Value *Overflow = Builder.CreateICmpUGE(LHS: Sub2, RHS: MinInfExpVal);
1183	Value Inf = ConstantFP::getInfinity(Ty: IToFP->getType(), /Negative=/*false);
1184	if (IsSigned) {
1185	Value *NegInf =
1186	ConstantFP::getInfinity(Ty: IToFP->getType(), /Negative=/true);
1187	Value *IsNeg =
1188	Builder.CreateICmpSLT(LHS: IntVal, RHS: ConstantInt::getNullValue(Ty: IntTy));
1189	Inf = Builder.CreateSelectWithUnknownProfile(C: IsNeg, True: NegInf, False: Inf,
1190	DEBUG_TYPE);
1191	}
1192	A4 = Builder.CreateSelect(C: Overflow, True: Inf, False: A4);
1193	// We consider overflow to be an unlikely case.
1194	applyProfMetadataIfEnabled(V: A4, setMetadataCallback: [&](Instruction *Inst) {
1195	Inst->setMetadata(
1196	KindID: LLVMContext::MD_prof,
1197	Node: MDBuilder (Inst->getContext()).createUnlikelyBranchWeights());
1198	});
1199	}
1200	Builder.CreateBr(Dest: End);
1201
1202	// return:
1203	Builder.SetInsertPoint(TheBB: End, IP: End->begin());
1204	PHINode *Retval0 = Builder.CreatePHI(Ty: IToFP->getType(), NumReservedValues: `2`);
1205	Retval0->addIncoming(V: A4, BB: IfEnd26);
1206	Retval0->addIncoming(V: ConstantFP::getZero(Ty: IToFP->getType(), Negative: false), BB: Entry);
1207
1208	IToFP->replaceAllUsesWith(V: Retval0);
1209	IToFP->dropAllReferences();
1210	IToFP->eraseFromParent();
1211	}
1212
1213	static void scalarize(Instruction *I,
1214	SmallVectorImpl<Instruction *> &Worklist) {
1215	VectorType *VTy = cast<FixedVectorType>(Val: I->getType());
1216
1217	IRBuilder<> Builder(I);
1218
1219	unsigned NumElements = VTy->getElementCount().getFixedValue();
1220	Value *Result = PoisonValue::get(T: VTy);
1221	for (unsigned Idx = `0`; Idx < NumElements; ++Idx) {
1222	Value *Ext = Builder.CreateExtractElement(Vec: I->getOperand(i: `0`), Idx);
1223
1224	Value NewOp = nullptr*;
1225	if (auto *BinOp = dyn_cast<BinaryOperator>(Val: I))
1226	NewOp = Builder.CreateBinOp(
1227	Opc: BinOp->getOpcode(), LHS: Ext,
1228	RHS: Builder.CreateExtractElement(Vec: I->getOperand(i: `1`), Idx));
1229	else if (auto *CastI = dyn_cast<CastInst>(Val: I))
1230	NewOp = Builder.CreateCast(Op: CastI->getOpcode(), V: Ext,
1231	DestTy: I->getType()->getScalarType());
1232	else if (auto *II = dyn_cast<IntrinsicInst>(Val: I)) {
1233	assert(II->getIntrinsicID() == Intrinsic::fptoui_sat \|\|
1234	II->getIntrinsicID() == Intrinsic::fptosi_sat);
1235	NewOp = Builder.CreateIntrinsic(RetTy: I->getType()->getScalarType(),
1236	ID: II->getIntrinsicID(), Args: {Ext});
1237	} else
1238	llvm_unreachable("Unsupported instruction type");
1239
1240	Result = Builder.CreateInsertElement(Vec: Result, NewElt: NewOp, Idx);
1241	if (auto *ScalarizedI = dyn_cast<Instruction>(Val: NewOp)) {
1242	ScalarizedI->copyIRFlags(V: I, IncludeWrapFlags: true);
1243	Worklist.push_back(Elt: ScalarizedI);
1244	}
1245	}
1246
1247	I->replaceAllUsesWith(V: Result);
1248	I->dropAllReferences();
1249	I->eraseFromParent();
1250	}
1251
1252	static void addToWorklist(Instruction &I,
1253	SmallVector<Instruction *, `4`> &Worklist) {
1254	if (I.getOperand(i: `0`)->getType()->isVectorTy())
1255	scalarize(I: &I, Worklist);
1256	else
1257	Worklist.push_back(Elt: &I);
1258	}
1259
1260	static bool runImpl(Function &F, const TargetLowering &TLI,
1261	const LibcallLoweringInfo &Libcalls, AssumptionCache *AC) {
1262	SmallVector<Instruction *, `4`> Worklist;
1263
1264	unsigned MaxLegalFpConvertBitWidth =
1265	TLI.getMaxLargeFPConvertBitWidthSupported();
1266	if (ExpandFpConvertBits != IntegerType::MAX_INT_BITS)
1267	MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
1268
1269	unsigned MaxLegalDivRemBitWidth = TLI.getMaxDivRemBitWidthSupported();
1270	if (ExpandDivRemBits != IntegerType::MAX_INT_BITS)
1271	MaxLegalDivRemBitWidth = ExpandDivRemBits;
1272
1273	bool DisableExpandLargeFp =
1274	MaxLegalFpConvertBitWidth >= IntegerType::MAX_INT_BITS;
1275	bool DisableExpandLargeDivRem =
1276	MaxLegalDivRemBitWidth >= IntegerType::MAX_INT_BITS;
1277	bool DisableFrem = !FRemExpander::shouldExpandAnyFremType(TLI);
1278
1279	if (DisableExpandLargeFp && DisableFrem && DisableExpandLargeDivRem)
1280	return false;
1281
1282	auto ShouldHandleInst = [&](Instruction &I) {
1283	Type *Ty = I.getType();
1284	// TODO: This pass doesn't handle scalable vectors.
1285	if (Ty->isScalableTy())
1286	return false;
1287
1288	switch (I.getOpcode()) {
1289	case Instruction::FRem:
1290	return !DisableFrem && FRemExpander::shouldExpandFremType(TLI, Ty);
1291	case Instruction::FPToUI:
1292	case Instruction::FPToSI:
1293	return !DisableExpandLargeFp &&
1294	cast<IntegerType>(Val: Ty->getScalarType())->getIntegerBitWidth() >
1295	MaxLegalFpConvertBitWidth;
1296	case Instruction::UIToFP:
1297	case Instruction::SIToFP:
1298	return !DisableExpandLargeFp &&
1299	cast<IntegerType>(Val: I.getOperand(i: `0`)->getType()->getScalarType())
1300	->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
1301	case Instruction::UDiv:
1302	case Instruction::SDiv:
1303	case Instruction::URem:
1304	case Instruction::SRem:
1305	// Power-of-2 divisors are handled inside the expansion (via efficient
1306	// shift/mask sequences) rather than being excluded here, so that
1307	// backends that cannot lower wide div/rem even for powers of two
1308	// (e.g. when DAGCombiner is disabled) still get valid lowered code.
1309	return !DisableExpandLargeDivRem &&
1310	cast<IntegerType>(Val: Ty->getScalarType())->getIntegerBitWidth() >
1311	MaxLegalDivRemBitWidth;
1312	case Instruction::Call: {
1313	auto *II = dyn_cast<IntrinsicInst>(Val: &I);
1314	if (II && (II->getIntrinsicID() == Intrinsic::fptoui_sat \|\|
1315	II->getIntrinsicID() == Intrinsic::fptosi_sat)) {
1316	return !DisableExpandLargeFp &&
1317	cast<IntegerType>(Val: Ty->getScalarType())->getIntegerBitWidth() >
1318	MaxLegalFpConvertBitWidth;
1319	}
1320	return false;
1321	}
1322	}
1323
1324	return false;
1325	};
1326
1327	bool Modified = false;
1328	for (auto It = inst_begin(F: &F), End = inst_end(F); It != End;) {
1329	Instruction &I = *It ++;
1330	if (!ShouldHandleInst (I))
1331	continue;
1332
1333	addToWorklist(I, Worklist);
1334	Modified = true;
1335	}
1336
1337	while (!Worklist.empty()) {
1338	Instruction *I = Worklist.pop_back_val();
1339
1340	switch (I->getOpcode()) {
1341	case Instruction::FRem: {
1342	auto SQ = [&]() -> std::optional<SimplifyQuery> {
1343	if (AC) {
1344	auto Res = std::make_optional<SimplifyQuery>(
1345	args: I->getModule()->getDataLayout(), args&: I);
1346	Res ->AC = AC;
1347	return Res;
1348	}
1349	return {};
1350	}();
1351
1352	expandFRem(I&: cast<BinaryOperator>(Val&: *I), SQ);
1353	break;
1354	}
1355
1356	case Instruction::FPToUI:
1357	expandFPToI(FPToI: I, /IsSaturating=/false, /IsSigned=/false);
1358	break;
1359	case Instruction::FPToSI:
1360	expandFPToI(FPToI: I, /IsSaturating=/false, /IsSigned=/true);
1361	break;
1362
1363	case Instruction::UIToFP:
1364	case Instruction::SIToFP:
1365	expandIToFP(IToFP: I);
1366	break;
1367
1368	case Instruction::UDiv:
1369	case Instruction::SDiv:
1370	case Instruction::URem:
1371	case Instruction::SRem: {
1372	auto *BO = cast<BinaryOperator>(Val: I);
1373	// TODO: isConstantPowerOfTwo does not handle vector constants, so
1374	// vector div/rem by a power-of-2 splat goes through the generic path.
1375	if (isConstantPowerOfTwo(V: BO->getOperand(i_nocapture: `1`), SignedOp: isSigned(Opcode: BO->getOpcode()))) {
1376	expandPow2DivRem(BO);
1377	} else {
1378	unsigned Opc = BO->getOpcode();
1379	if (Opc == Instruction::UDiv \|\| Opc == Instruction::SDiv)
1380	expandDivision(Div: BO);
1381	else
1382	expandRemainder(Rem: BO);
1383	}
1384	break;
1385	}
1386	case Instruction::Call: {
1387	auto *II = cast<IntrinsicInst>(Val: I);
1388	assert(II->getIntrinsicID() == Intrinsic::fptoui_sat \|\|
1389	II->getIntrinsicID() == Intrinsic::fptosi_sat);
1390	expandFPToI(FPToI: I, /IsSaturating=/true,
1391	/IsSigned=/II->getIntrinsicID() == Intrinsic::fptosi_sat);
1392	break;
1393	}
1394	}
1395	}
1396
1397	return Modified;
1398	}
1399
1400	namespace {
1401	class ExpandIRInstsLegacyPass : public FunctionPass {
1402	CodeGenOptLevel OptLevel;
1403
1404	public:
1405	static char ID;
1406
1407	ExpandIRInstsLegacyPass(CodeGenOptLevel OptLevel)
1408	: FunctionPass (ID), OptLevel(OptLevel) {}
1409
1410	ExpandIRInstsLegacyPass() : ExpandIRInstsLegacyPass (CodeGenOptLevel::None) {}
1411
1412	bool runOnFunction(Function &F) override {
1413	auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1414	const TargetSubtargetInfo *Subtarget = TM->getSubtargetImpl(F);
1415	auto *TLI = Subtarget->getTargetLowering();
1416	AssumptionCache AC = nullptr*;
1417
1418	const LibcallLoweringInfo &Libcalls =
1419	getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
1420	M: F.getParent(), Subtarget: Subtarget);
1421
1422	if (OptLevel != CodeGenOptLevel::None && !F.hasOptNone())
1423	AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1424	return runImpl(F, TLI: *TLI, Libcalls, AC);
1425	}
1426
1427	void getAnalysisUsage(AnalysisUsage &AU) const override {
1428	AU.addRequired<LibcallLoweringInfoWrapper>();
1429	AU.addRequired<TargetPassConfig>();
1430	if (OptLevel != CodeGenOptLevel::None)
1431	AU.addRequired<AssumptionCacheTracker>();
1432	AU.addPreserved<AAResultsWrapperPass>();
1433	AU.addPreserved<GlobalsAAWrapperPass>();
1434	AU.addRequired<LibcallLoweringInfoWrapper>();
1435	}
1436	};
1437	} // namespace
1438
1439	ExpandIRInstsPass::ExpandIRInstsPass(const TargetMachine &TM,
1440	CodeGenOptLevel OptLevel)
1441	: TM(&TM), OptLevel(OptLevel) {}
1442
1443	void ExpandIRInstsPass::printPipeline(
1444	raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1445	static_cast<PassInfoMixin<ExpandIRInstsPass> >(this*)->printPipeline(
1446	OS, MapClassName2PassName);
1447	OS << `'<'`;
1448	OS << "O" << (int)OptLevel;
1449	OS << `'>'`;
1450	}
1451
1452	PreservedAnalyses ExpandIRInstsPass::run(Function &F,
1453	FunctionAnalysisManager &FAM) {
1454	const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
1455	auto &TLI = *STI->getTargetLowering();
1456	AssumptionCache AC = nullptr*;
1457	if (OptLevel != CodeGenOptLevel::None)
1458	AC = &FAM.getResult<AssumptionAnalysis>(IR&: F);
1459
1460	auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
1461
1462	const ModuleLibcallLoweringInfo *LibcallLowering =
1463	MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(IR&: *F.getParent());
1464
1465	if (!LibcallLowering) {
1466	F.getContext().emitError(ErrorStr: "'" + LibcallLoweringModuleAnalysis::name() +
1467	"' analysis required");
1468	return PreservedAnalyses::all();
1469	}
1470
1471	const LibcallLoweringInfo &Libcalls =
1472	LibcallLowering->getLibcallLowering(Subtarget: *STI);
1473
1474	return runImpl(F, TLI, Libcalls, AC) ? PreservedAnalyses::none()
1475	: PreservedAnalyses::all();
1476	}
1477
1478	char ExpandIRInstsLegacyPass::ID = `0`;
1479	INITIALIZE_PASS_BEGIN(ExpandIRInstsLegacyPass, "expand-ir-insts",
1480	"Expand certain fp instructions", false, false)
1481	INITIALIZE_PASS_DEPENDENCY(LibcallLoweringInfoWrapper)
1482	INITIALIZE_PASS_END(ExpandIRInstsLegacyPass, "expand-ir-insts",
1483	"Expand IR instructions", false, false)
1484
1485	FunctionPass *llvm::createExpandIRInstsPass(CodeGenOptLevel OptLevel) {
1486	return new ExpandIRInstsLegacyPass (OptLevel);
1487	}
1488

Browse the source code of llvm_projects/llvm/lib/CodeGen/ExpandIRInsts.cpp