1//===--- ExpandIRInsts.cpp - Expand IR instructions -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This pass expands certain instructions at the IR level.
9//
10// The following expansions are implemented:
11// - Expansion of ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’, ‘sitofp
12// .. to’ instructions with a bitwidth above a threshold. This is
13// useful for targets like x86_64 that cannot lower fp convertions
14// with more than 128 bits.
15//
16// - Expansion of ‘frem‘ for types MVT::f16, MVT::f32, and MVT::f64 for
17// targets which use "Expand" as the legalization action for the
18// corresponding type.
19//
20// - Expansion of ‘udiv‘, ‘sdiv‘, ‘urem‘, and ‘srem‘ instructions with
21// a bitwidth above a threshold into a call to auto-generated
22// functions. This is useful for targets like x86_64 that cannot
23// lower divisions with more than 128 bits or targets like x86_32 that
24// cannot lower divisions with more than 64 bits.
25//
26// Instructions with vector types are scalarized first if their scalar
27// types can be expanded. Scalable vector types are not supported.
28//===----------------------------------------------------------------------===//
29
30#include "llvm/CodeGen/ExpandIRInsts.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/Analysis/AssumptionCache.h"
33#include "llvm/Analysis/GlobalsModRef.h"
34#include "llvm/Analysis/SimplifyQuery.h"
35#include "llvm/Analysis/ValueTracking.h"
36#include "llvm/CodeGen/ISDOpcodes.h"
37#include "llvm/CodeGen/Passes.h"
38#include "llvm/CodeGen/TargetLowering.h"
39#include "llvm/CodeGen/TargetPassConfig.h"
40#include "llvm/CodeGen/TargetSubtargetInfo.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstIterator.h"
43#include "llvm/IR/Instruction.h"
44#include "llvm/IR/Instructions.h"
45#include "llvm/IR/IntrinsicInst.h"
46#include "llvm/IR/MDBuilder.h"
47#include "llvm/IR/Module.h"
48#include "llvm/IR/PassManager.h"
49#include "llvm/IR/ProfDataUtils.h"
50#include "llvm/InitializePasses.h"
51#include "llvm/Pass.h"
52#include "llvm/Support/Casting.h"
53#include "llvm/Support/CommandLine.h"
54#include "llvm/Support/ErrorHandling.h"
55#include "llvm/Target/TargetMachine.h"
56#include "llvm/Transforms/Utils/BasicBlockUtils.h"
57#include "llvm/Transforms/Utils/IntegerDivision.h"
58#include <optional>
59
60#define DEBUG_TYPE "expand-ir-insts"
61
62using namespace llvm;
63
64namespace llvm {
65extern cl::opt<bool> ProfcheckDisableMetadataFixes;
66}
67
68static cl::opt<unsigned>
69 ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
70 cl::init(Val: IntegerType::MAX_INT_BITS),
71 cl::desc("fp convert instructions on integers with "
72 "more than <N> bits are expanded."));
73
74static cl::opt<unsigned>
75 ExpandDivRemBits("expand-div-rem-bits", cl::Hidden,
76 cl::init(Val: IntegerType::MAX_INT_BITS),
77 cl::desc("div and rem instructions on integers with "
78 "more than <N> bits are expanded."));
79
80static bool isConstantPowerOfTwo(Value *V, bool SignedOp) {
81 auto *C = dyn_cast<ConstantInt>(Val: V);
82 if (!C)
83 return false;
84
85 APInt Val = C->getValue();
86 if (SignedOp && Val.isNegative())
87 Val = -Val;
88 return Val.isPowerOf2();
89}
90
91static bool isSigned(unsigned Opcode) {
92 return Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
93}
94
95/// For signed div/rem by a power of 2, compute the bias-adjusted dividend:
96/// Sign = ashr X, (BitWidth - 1) -- 0 or -1
97/// Bias = lshr Sign, (BitWidth - ShiftAmt) -- 0 or 2^ShiftAmt - 1
98/// Adjusted = add X, Bias
99/// The bias adds (2^ShiftAmt - 1) for negative X, correcting rounding towards
100/// zero (instead of towards -inf that a plain ashr would give).
101/// The lshr form is used instead of 'and' to avoid large immediate constants.
102static Value *addSignedBias(IRBuilder<> &Builder, Value *X, unsigned BitWidth,
103 unsigned ShiftAmt) {
104 assert(ShiftAmt > 0 && ShiftAmt < BitWidth &&
105 "ShiftAmt out of range; callers should handle ShiftAmt == 0");
106 Value *Sign = Builder.CreateAShr(LHS: X, RHS: BitWidth - 1, Name: "sign");
107 Value *Bias = Builder.CreateLShr(LHS: Sign, RHS: BitWidth - ShiftAmt, Name: "bias");
108 return Builder.CreateAdd(LHS: X, RHS: Bias, Name: "adjusted");
109}
110
111/// Expand division or remainder by a power-of-2 constant.
112/// Division (let C = log2(|divisor|)):
113/// udiv X, 2^C -> lshr X, C
114/// sdiv X, 2^C -> ashr (add X, Bias), C (Bias corrects rounding)
115/// sdiv exact X, 2^C -> ashr exact X, C (no bias needed)
116/// For negative power-of-2 divisors, the division result is negated.
117/// Remainder (let C = log2(|divisor|)):
118/// urem X, 2^C -> and X, (2^C - 1)
119/// srem X, 2^C -> sub X, (shl (ashr (add X, Bias), C), C)
120static void expandPow2DivRem(BinaryOperator *BO) {
121 LLVM_DEBUG(dbgs() << "Expanding instruction: " << *BO << '\n');
122
123 unsigned Opcode = BO->getOpcode();
124 bool IsDiv = (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv);
125 bool IsSigned = isSigned(Opcode);
126 // isExact() is only valid for div.
127 bool IsExact = IsDiv && BO->isExact();
128
129 assert(isConstantPowerOfTwo(BO->getOperand(1), IsSigned) &&
130 "Expected power-of-2 constant divisor");
131
132 Value *X = BO->getOperand(i_nocapture: 0);
133 auto *C = cast<ConstantInt>(Val: BO->getOperand(i_nocapture: 1));
134 Type *Ty = BO->getType();
135 unsigned BitWidth = Ty->getIntegerBitWidth();
136
137 APInt DivisorVal = C->getValue();
138 bool IsNegativeDivisor = IsSigned && DivisorVal.isNegative();
139 // Use countr_zero() to get the shift amount directly from the bit pattern.
140 // This works correctly for both positive and negative powers of 2, including
141 // INT_MIN, without needing to negate the value first.
142 unsigned ShiftAmt = DivisorVal.countr_zero();
143
144 IRBuilder<> Builder(BO);
145 Value *Result;
146
147 if (ShiftAmt == 0) {
148 // Div by 1/-1: X / 1 = X, X / -1 = -X.
149 // Rem by 1/-1: always 0.
150 if (IsDiv)
151 Result = IsNegativeDivisor ? Builder.CreateNeg(V: X) : X;
152 else
153 Result = ConstantInt::get(Ty, V: 0);
154 } else if (IsSigned) {
155 // The signed expansion uses X multiple times (bias computation, shift,
156 // and sub for remainder). Freeze X to ensure consistent behavior if it is
157 // undef/poison. For exact division, no bias is needed and X is used only
158 // once, so freeze is unnecessary.
159 if (!IsExact && !isGuaranteedNotToBeUndefOrPoison(V: X))
160 X = Builder.CreateFreeze(V: X, Name: X->getName() + ".fr");
161 // For exact division, no bias is needed since there's no rounding.
162 Value *Dividend =
163 IsExact ? X : addSignedBias(Builder, X, BitWidth, ShiftAmt);
164 Value *Quotient = Builder.CreateAShr(
165 LHS: Dividend, RHS: ShiftAmt, Name: IsDiv && IsNegativeDivisor ? "pre.neg" : "shifted",
166 isExact: IsExact);
167 if (IsDiv) {
168 Result = IsNegativeDivisor ? Builder.CreateNeg(V: Quotient) : Quotient;
169 } else {
170 // Rem = X - (Quotient << ShiftAmt):
171 // clear lower ShiftAmt bits via round-trip shift, then subtract.
172 Value *Truncated = Builder.CreateShl(LHS: Quotient, RHS: ShiftAmt, Name: "truncated");
173 Result = Builder.CreateSub(LHS: X, RHS: Truncated);
174 }
175 } else {
176 if (IsDiv) {
177 Result = Builder.CreateLShr(LHS: X, RHS: ShiftAmt, Name: "", isExact: IsExact);
178 } else {
179 APInt Mask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ShiftAmt);
180 Result = Builder.CreateAnd(LHS: X, RHS: ConstantInt::get(Ty, V: Mask));
181 }
182 }
183
184 BO->replaceAllUsesWith(V: Result);
185 if (Result != X)
186 if (auto *RI = dyn_cast<Instruction>(Val: Result))
187 RI->takeName(V: BO);
188 BO->dropAllReferences();
189 BO->eraseFromParent();
190}
191
192/// This class implements a precise expansion of the frem instruction.
193/// The generated code is based on the fmod implementation in the AMD device
194/// libs.
195namespace {
196class FRemExpander {
197 /// The IRBuilder to use for the expansion.
198 IRBuilder<> &B;
199
200 /// Floating point type of the return value and the arguments of the FRem
201 /// instructions that should be expanded.
202 Type *FremTy;
203
204 /// Floating point type to use for the computation. This may be
205 /// wider than the \p FremTy.
206 Type *ComputeFpTy;
207
208 /// Integer type used to hold the exponents returned by frexp.
209 Type *ExTy;
210
211 /// How many bits of the quotient to compute per iteration of the
212 /// algorithm, stored as a value of type \p ExTy.
213 Value *Bits;
214
215 /// Constant 1 of type \p ExTy.
216 Value *One;
217
218 /// The frem argument/return types that can be expanded by this class.
219 // TODO: The expansion could work for other floating point types
220 // as well, but this would require additional testing.
221 static constexpr std::array<MVT, 3> ExpandableTypes{MVT::f16, MVT::f32,
222 MVT::f64};
223
224public:
225 static bool canExpandType(Type *Ty) {
226 EVT VT = EVT::getEVT(Ty);
227 assert(VT.isSimple() && "Can expand only simple types");
228
229 return is_contained(Range: ExpandableTypes, Element: VT.getSimpleVT());
230 }
231
232 static bool shouldExpandFremType(const TargetLowering &TLI, EVT VT) {
233 assert(!VT.isVector() && "Cannot handle vector type; must scalarize first");
234 return TLI.getOperationAction(Op: ISD::FREM, VT) ==
235 TargetLowering::LegalizeAction::Expand;
236 }
237
238 static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
239 // Consider scalar type for simplicity. It seems unlikely that a
240 // vector type can be legalized without expansion if the scalar
241 // type cannot.
242 return shouldExpandFremType(TLI, VT: EVT::getEVT(Ty: Ty->getScalarType()));
243 }
244
245 /// Return true if the pass should expand frem instructions of any type
246 /// for the target represented by \p TLI.
247 static bool shouldExpandAnyFremType(const TargetLowering &TLI) {
248 return any_of(Range: ExpandableTypes,
249 P: [&](MVT V) { return shouldExpandFremType(TLI, VT: EVT(V)); });
250 }
251
252 static FRemExpander create(IRBuilder<> &B, Type *Ty) {
253 assert(canExpandType(Ty) && "Expected supported floating point type");
254
255 // The type to use for the computation of the remainder. This may be
256 // wider than the input/result type which affects the ...
257 Type *ComputeTy = Ty;
258 // ... maximum number of iterations of the remainder computation loop
259 // to use. This value is for the case in which the computation
260 // uses the same input/result type.
261 unsigned MaxIter = 2;
262
263 if (Ty->isHalfTy()) {
264 // Use the wider type and less iterations.
265 ComputeTy = B.getFloatTy();
266 MaxIter = 1;
267 }
268
269 unsigned Precision = APFloat::semanticsPrecision(Ty->getFltSemantics());
270 return FRemExpander{B, Ty, Precision / MaxIter, ComputeTy};
271 }
272
273 /// Build the FRem expansion for the numerator \p X and the
274 /// denumerator \p Y. The type of X and Y must match \p FremTy. The
275 /// code will be generated at the insertion point of \p B and the
276 /// insertion point will be reset at exit.
277 Value *buildFRem(Value *X, Value *Y, std::optional<SimplifyQuery> &SQ) const;
278
279 /// Build an approximate FRem expansion for the numerator \p X and
280 /// the denumerator \p Y at the insertion point of builder \p B.
281 /// The type of X and Y must match \p FremTy.
282 Value *buildApproxFRem(Value *X, Value *Y) const;
283
284private:
285 FRemExpander(IRBuilder<> &B, Type *FremTy, unsigned Bits, Type *ComputeFpTy)
286 : B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()),
287 Bits(ConstantInt::get(Ty: ExTy, V: Bits)), One(ConstantInt::get(Ty: ExTy, V: 1)) {}
288
289 Value *createRcp(Value *V, const Twine &Name) const {
290 // Leave it to later optimizations to turn this into an rcp
291 // instruction if available.
292 return B.CreateFDiv(L: ConstantFP::get(Ty: ComputeFpTy, V: 1.0), R: V, Name);
293 }
294
295 // Helper function to build the UPDATE_AX code which is common to the
296 // loop body and the "final iteration".
297 Value *buildUpdateAx(Value *Ax, Value *Ay, Value *Ayinv) const {
298 // Build:
299 // float q = rint(ax * ayinv);
300 // ax = fma(-q, ay, ax);
301 // int clt = ax < 0.0f;
302 // float axp = ax + ay;
303 // ax = clt ? axp : ax;
304 Value *Q = B.CreateUnaryIntrinsic(ID: Intrinsic::rint, Op: B.CreateFMul(L: Ax, R: Ayinv),
305 FMFSource: {}, Name: "q");
306 Value *AxUpdate = B.CreateFMA(Factor1: B.CreateFNeg(V: Q), Factor2: Ay, Summand: Ax, FMFSource: {}, Name: "ax");
307 Value *Clt = B.CreateFCmp(P: CmpInst::FCMP_OLT, LHS: AxUpdate,
308 RHS: ConstantFP::getZero(Ty: ComputeFpTy), Name: "clt");
309 Value *Axp = B.CreateFAdd(L: AxUpdate, R: Ay, Name: "axp");
310 return B.CreateSelect(C: Clt, True: Axp, False: AxUpdate, Name: "ax");
311 }
312
313 /// Build code to extract the exponent and mantissa of \p Src.
314 /// Return the exponent minus one for use as a loop bound and
315 /// the mantissa taken to the given \p NewExp power.
316 std::pair<Value *, Value *> buildExpAndPower(Value *Src, Value *NewExp,
317 const Twine &ExName,
318 const Twine &PowName) const {
319 // Build:
320 // ExName = frexp_exp(Src) - 1;
321 // PowName = fldexp(frexp_mant(ExName), NewExp);
322 Type *Ty = Src->getType();
323 Type *ExTy = B.getInt32Ty();
324 Value *Frexp = B.CreateIntrinsic(ID: Intrinsic::frexp, OverloadTypes: {Ty, ExTy}, Args: Src);
325 Value *Mant = B.CreateExtractValue(Agg: Frexp, Idxs: {0});
326 Value *Exp = B.CreateExtractValue(Agg: Frexp, Idxs: {1});
327
328 Exp = B.CreateSub(LHS: Exp, RHS: One, Name: ExName);
329 Value *Pow = B.CreateLdexp(Src: Mant, Exp: NewExp, FMFSource: {}, Name: PowName);
330
331 return {Pow, Exp};
332 }
333
334 /// Build the main computation of the remainder for the case in which
335 /// Ax > Ay, where Ax = |X|, Ay = |Y|, and X is the numerator and Y the
336 /// denumerator. Add the incoming edge from the computation result
337 /// to \p RetPhi.
338 void buildRemainderComputation(Value *AxInitial, Value *AyInitial, Value *X,
339 PHINode *RetPhi, FastMathFlags FMF) const {
340 IRBuilder<>::FastMathFlagGuard Guard(B);
341 B.setFastMathFlags(FMF);
342
343 // Build:
344 // ex = frexp_exp(ax) - 1;
345 // ax = fldexp(frexp_mant(ax), bits);
346 // ey = frexp_exp(ay) - 1;
347 // ay = fledxp(frexp_mant(ay), 1);
348 auto [Ax, Ex] = buildExpAndPower(Src: AxInitial, NewExp: Bits, ExName: "ex", PowName: "ax");
349 auto [Ay, Ey] = buildExpAndPower(Src: AyInitial, NewExp: One, ExName: "ey", PowName: "ay");
350
351 // Build:
352 // int nb = ex - ey;
353 // float ayinv = 1.0/ay;
354 Value *Nb = B.CreateSub(LHS: Ex, RHS: Ey, Name: "nb");
355 Value *Ayinv = createRcp(V: Ay, Name: "ayinv");
356
357 // Build: while (nb > bits)
358 BasicBlock *PreheaderBB = B.GetInsertBlock();
359 Function *Fun = PreheaderBB->getParent();
360 auto *LoopBB = BasicBlock::Create(Context&: B.getContext(), Name: "frem.loop_body", Parent: Fun);
361 auto *ExitBB = BasicBlock::Create(Context&: B.getContext(), Name: "frem.loop_exit", Parent: Fun);
362
363 B.CreateCondBr(Cond: B.CreateICmp(P: CmpInst::ICMP_SGT, LHS: Nb, RHS: Bits), True: LoopBB, False: ExitBB);
364
365 // Build loop body:
366 // UPDATE_AX
367 // ax = fldexp(ax, bits);
368 // nb -= bits;
369 // One iteration of the loop is factored out. The code shared by
370 // the loop and this "iteration" is denoted by UPDATE_AX.
371 B.SetInsertPoint(LoopBB);
372 PHINode *NbIv = B.CreatePHI(Ty: Nb->getType(), NumReservedValues: 2, Name: "nb_iv");
373 NbIv->addIncoming(V: Nb, BB: PreheaderBB);
374
375 auto *AxPhi = B.CreatePHI(Ty: ComputeFpTy, NumReservedValues: 2, Name: "ax_loop_phi");
376 AxPhi->addIncoming(V: Ax, BB: PreheaderBB);
377
378 Value *AxPhiUpdate = buildUpdateAx(Ax: AxPhi, Ay, Ayinv);
379 AxPhiUpdate = B.CreateLdexp(Src: AxPhiUpdate, Exp: Bits, FMFSource: {}, Name: "ax_update");
380 AxPhi->addIncoming(V: AxPhiUpdate, BB: LoopBB);
381 NbIv->addIncoming(V: B.CreateSub(LHS: NbIv, RHS: Bits, Name: "nb_update"), BB: LoopBB);
382
383 B.CreateCondBr(Cond: B.CreateICmp(P: CmpInst::ICMP_SGT, LHS: NbIv, RHS: Bits), True: LoopBB, False: ExitBB);
384
385 // Build final iteration
386 // ax = fldexp(ax, nb - bits + 1);
387 // UPDATE_AX
388 B.SetInsertPoint(ExitBB);
389
390 auto *AxPhiExit = B.CreatePHI(Ty: ComputeFpTy, NumReservedValues: 2, Name: "ax_exit_phi");
391 AxPhiExit->addIncoming(V: Ax, BB: PreheaderBB);
392 AxPhiExit->addIncoming(V: AxPhi, BB: LoopBB);
393 auto *NbExitPhi = B.CreatePHI(Ty: Nb->getType(), NumReservedValues: 2, Name: "nb_exit_phi");
394 NbExitPhi->addIncoming(V: NbIv, BB: LoopBB);
395 NbExitPhi->addIncoming(V: Nb, BB: PreheaderBB);
396
397 Value *AxFinal = B.CreateLdexp(
398 Src: AxPhiExit, Exp: B.CreateAdd(LHS: B.CreateSub(LHS: NbExitPhi, RHS: Bits), RHS: One), FMFSource: {}, Name: "ax");
399 AxFinal = buildUpdateAx(Ax: AxFinal, Ay, Ayinv);
400
401 // Build:
402 // ax = fldexp(ax, ey);
403 // ret = copysign(ax,x);
404 AxFinal = B.CreateLdexp(Src: AxFinal, Exp: Ey, FMFSource: {}, Name: "ax");
405 if (ComputeFpTy != FremTy)
406 AxFinal = B.CreateFPTrunc(V: AxFinal, DestTy: FremTy);
407 Value *Ret = B.CreateCopySign(LHS: AxFinal, RHS: X);
408
409 RetPhi->addIncoming(V: Ret, BB: ExitBB);
410 }
411
412 /// Build the else-branch of the conditional in the FRem
413 /// expansion, i.e. the case in wich Ax <= Ay, where Ax = |X|, Ay
414 /// = |Y|, and X is the numerator and Y the denumerator. Add the
415 /// incoming edge from the result to \p RetPhi.
416 void buildElseBranch(Value *Ax, Value *Ay, Value *X, PHINode *RetPhi) const {
417 // Build:
418 // ret = ax == ay ? copysign(0.0f, x) : x;
419 Value *ZeroWithXSign = B.CreateCopySign(LHS: ConstantFP::getZero(Ty: FremTy), RHS: X);
420 Value *Ret = B.CreateSelect(C: B.CreateFCmpOEQ(LHS: Ax, RHS: Ay), True: ZeroWithXSign, False: X);
421
422 RetPhi->addIncoming(V: Ret, BB: B.GetInsertBlock());
423 }
424
425 /// Return a value that is NaN if one of the corner cases concerning
426 /// the inputs \p X and \p Y is detected, and \p Ret otherwise.
427 Value *handleInputCornerCases(Value *Ret, Value *X, Value *Y,
428 std::optional<SimplifyQuery> &SQ,
429 bool NoInfs) const {
430 // Build:
431 // ret = (y == 0.0f || isnan(y)) ? QNAN : ret;
432 // ret = isfinite(x) ? ret : QNAN;
433 Value *Nan = ConstantFP::getQNaN(Ty: FremTy);
434 Ret = B.CreateSelect(C: B.CreateFCmpUEQ(LHS: Y, RHS: ConstantFP::getZero(Ty: FremTy)), True: Nan,
435 False: Ret);
436 Value *XFinite =
437 NoInfs || (SQ && isKnownNeverInfinity(V: X, SQ: *SQ))
438 ? B.getTrue()
439 : B.CreateFCmpULT(LHS: B.CreateFAbs(V: X), RHS: ConstantFP::getInfinity(Ty: FremTy));
440 Ret = B.CreateSelect(C: XFinite, True: Ret, False: Nan);
441
442 return Ret;
443 }
444};
445} // namespace
446
447Value *FRemExpander::buildApproxFRem(Value *X, Value *Y) const {
448 IRBuilder<>::FastMathFlagGuard Guard(B);
449 // Propagating the approximate functions flag to the
450 // division leads to an unacceptable drop in precision
451 // on AMDGPU.
452 // TODO Find out if any flags might be worth propagating.
453 B.clearFastMathFlags();
454
455 Value *Quot = B.CreateFDiv(L: X, R: Y);
456 Value *Trunc = B.CreateUnaryIntrinsic(ID: Intrinsic::trunc, Op: Quot, FMFSource: {});
457 Value *Neg = B.CreateFNeg(V: Trunc);
458
459 return B.CreateFMA(Factor1: Neg, Factor2: Y, Summand: X);
460}
461
462Value *FRemExpander::buildFRem(Value *X, Value *Y,
463 std::optional<SimplifyQuery> &SQ) const {
464 assert(X->getType() == FremTy && Y->getType() == FremTy);
465
466 FastMathFlags FMF = B.getFastMathFlags();
467
468 // This function generates the following code structure:
469 // if (abs(x) > abs(y))
470 // { ret = compute remainder }
471 // else
472 // { ret = x or 0 with sign of x }
473 // Adjust ret to NaN/inf in input
474 // return ret
475 Value *Ax = B.CreateFAbs(V: X, FMFSource: {}, Name: "ax");
476 Value *Ay = B.CreateFAbs(V: Y, FMFSource: {}, Name: "ay");
477 if (ComputeFpTy != X->getType()) {
478 Ax = B.CreateFPExt(V: Ax, DestTy: ComputeFpTy, Name: "ax");
479 Ay = B.CreateFPExt(V: Ay, DestTy: ComputeFpTy, Name: "ay");
480 }
481 Value *AxAyCmp = B.CreateFCmpOGT(LHS: Ax, RHS: Ay);
482
483 PHINode *RetPhi = B.CreatePHI(Ty: FremTy, NumReservedValues: 2, Name: "ret");
484 Value *Ret = RetPhi;
485
486 // We would return NaN in all corner cases handled here.
487 // Hence, if NaNs are excluded, keep the result as it is.
488 if (!FMF.noNaNs())
489 Ret = handleInputCornerCases(Ret, X, Y, SQ, NoInfs: FMF.noInfs());
490
491 Function *Fun = B.GetInsertBlock()->getParent();
492 auto *ThenBB = BasicBlock::Create(Context&: B.getContext(), Name: "frem.compute", Parent: Fun);
493 auto *ElseBB = BasicBlock::Create(Context&: B.getContext(), Name: "frem.else", Parent: Fun);
494 SplitBlockAndInsertIfThenElse(Cond: AxAyCmp, SplitBefore: RetPhi, ThenBlock: &ThenBB, ElseBlock: &ElseBB);
495
496 auto SavedInsertPt = B.GetInsertPoint();
497
498 // Build remainder computation for "then" branch
499 //
500 // The ordered comparison ensures that ax and ay are not NaNs
501 // in the then-branch. Furthermore, y cannot be an infinity and the
502 // check at the end of the function ensures that the result will not
503 // be used if x is an infinity.
504 FastMathFlags ComputeFMF = FMF;
505 ComputeFMF.setNoInfs();
506 ComputeFMF.setNoNaNs();
507
508 B.SetInsertPoint(ThenBB);
509 buildRemainderComputation(AxInitial: Ax, AyInitial: Ay, X, RetPhi, FMF);
510 B.CreateBr(Dest: RetPhi->getParent());
511
512 // Build "else"-branch
513 B.SetInsertPoint(ElseBB);
514 buildElseBranch(Ax, Ay, X, RetPhi);
515 B.CreateBr(Dest: RetPhi->getParent());
516
517 B.SetInsertPoint(SavedInsertPt);
518
519 return Ret;
520}
521
522static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) {
523 LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << '\n');
524
525 Type *Ty = I.getType();
526 assert(FRemExpander::canExpandType(Ty) &&
527 "Expected supported floating point type");
528
529 FastMathFlags FMF = I.getFastMathFlags();
530 // TODO Make use of those flags for optimization?
531 FMF.setAllowReciprocal(false);
532 FMF.setAllowContract(false);
533
534 IRBuilder<> B(&I);
535 B.setFastMathFlags(FMF);
536 B.SetCurrentDebugLocation(I.getDebugLoc());
537
538 const FRemExpander Expander = FRemExpander::create(B, Ty);
539 Value *Ret = FMF.approxFunc()
540 ? Expander.buildApproxFRem(X: I.getOperand(i_nocapture: 0), Y: I.getOperand(i_nocapture: 1))
541 : Expander.buildFRem(X: I.getOperand(i_nocapture: 0), Y: I.getOperand(i_nocapture: 1), SQ);
542
543 I.replaceAllUsesWith(V: Ret);
544 Ret->takeName(V: &I);
545 I.eraseFromParent();
546
547 return true;
548}
549// clang-format off: preserve formatting of the following example
550
551/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
552/// the generated code. This currently generates code similarly to compiler-rt's
553/// implementations.
554///
555/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
556/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
557/// entry:
558/// %0 = bitcast float %a to i32
559/// %conv.i = zext i32 %0 to i64
560/// %tobool.not = icmp sgt i32 %0, -1
561/// %conv = select i1 %tobool.not, i64 1, i64 -1
562/// %and = lshr i64 %conv.i, 23
563/// %shr = and i64 %and, 255
564/// %and2 = and i64 %conv.i, 8388607
565/// %or = or i64 %and2, 8388608
566/// %cmp = icmp ult i64 %shr, 127
567/// br i1 %cmp, label %cleanup, label %if.end
568///
569/// if.end: ; preds = %entry
570/// %sub = add nuw nsw i64 %shr, 4294967169
571/// %conv5 = and i64 %sub, 4294967232
572/// %cmp6.not = icmp eq i64 %conv5, 0
573/// br i1 %cmp6.not, label %if.end12, label %if.then8
574///
575/// if.then8: ; preds = %if.end
576/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64
577/// -9223372036854775808 br label %cleanup
578///
579/// if.end12: ; preds = %if.end
580/// %cmp13 = icmp ult i64 %shr, 150
581/// br i1 %cmp13, label %if.then15, label %if.else
582///
583/// if.then15: ; preds = %if.end12
584/// %sub16 = sub nuw nsw i64 150, %shr
585/// %shr17 = lshr i64 %or, %sub16
586/// %mul = mul nsw i64 %shr17, %conv
587/// br label %cleanup
588///
589/// if.else: ; preds = %if.end12
590/// %sub18 = add nsw i64 %shr, -150
591/// %shl = shl i64 %or, %sub18
592/// %mul19 = mul nsw i64 %shl, %conv
593/// br label %cleanup
594///
595/// cleanup: ; preds = %entry,
596/// %if.else, %if.then15, %if.then8
597/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [
598/// %mul19, %if.else ], [ 0, %entry ] ret i64 %retval.0
599/// }
600///
601/// Replace fp to integer with generated code.
602static void expandFPToI(Instruction *FPToI, bool IsSaturating, bool IsSigned) {
603 // clang-format on
604 IRBuilder<> Builder(FPToI);
605 auto *FloatVal = FPToI->getOperand(i: 0);
606 IntegerType *IntTy = cast<IntegerType>(Val: FPToI->getType());
607
608 unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
609 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
610
611 // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
612 // to i32 first following a sext/zext to target integer type.
613 Value *A1 = nullptr;
614 if (FloatVal->getType()->isHalfTy() && BitWidth >= 32) {
615 if (FPToI->getOpcode() == Instruction::FPToUI) {
616 Value *A0 = Builder.CreateFPToUI(V: FloatVal, DestTy: Builder.getInt32Ty());
617 A1 = Builder.CreateZExt(V: A0, DestTy: IntTy);
618 } else { // FPToSI
619 Value *A0 = Builder.CreateFPToSI(V: FloatVal, DestTy: Builder.getInt32Ty());
620 A1 = Builder.CreateSExt(V: A0, DestTy: IntTy);
621 }
622 FPToI->replaceAllUsesWith(V: A1);
623 FPToI->dropAllReferences();
624 FPToI->eraseFromParent();
625 return;
626 }
627
628 // fp80 conversion is implemented by fpext to fp128 first then do the
629 // conversion.
630 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
631 unsigned FloatWidth =
632 PowerOf2Ceil(A: FloatVal->getType()->getScalarSizeInBits());
633 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
634 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
635 IntegerType *FloatIntTy = Builder.getIntNTy(N: FloatWidth);
636 Value *ImplicitBit = ConstantInt::get(
637 Ty: FloatIntTy, V: APInt::getOneBitSet(numBits: FloatWidth, BitNo: FPMantissaWidth));
638 Value *SignificandMask = ConstantInt::get(
639 Ty: FloatIntTy, V: APInt::getLowBitsSet(numBits: FloatWidth, loBitsSet: FPMantissaWidth));
640
641 BasicBlock *Entry = Builder.GetInsertBlock();
642 Function *F = Entry->getParent();
643 Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
644 BasicBlock *CheckSaturateBB, *SaturateBB;
645 BasicBlock *End =
646 Entry->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "fp-to-i-cleanup");
647 if (IsSaturating) {
648 CheckSaturateBB = BasicBlock::Create(Context&: Builder.getContext(),
649 Name: "fp-to-i-if-check.saturate", Parent: F, InsertBefore: End);
650 SaturateBB =
651 BasicBlock::Create(Context&: Builder.getContext(), Name: "fp-to-i-if-saturate", Parent: F, InsertBefore: End);
652 }
653 BasicBlock *CheckExpSizeBB = BasicBlock::Create(
654 Context&: Builder.getContext(), Name: "fp-to-i-if-check.exp.size", Parent: F, InsertBefore: End);
655 BasicBlock *ExpSmallBB =
656 BasicBlock::Create(Context&: Builder.getContext(), Name: "fp-to-i-if-exp.small", Parent: F, InsertBefore: End);
657 BasicBlock *ExpLargeBB =
658 BasicBlock::Create(Context&: Builder.getContext(), Name: "fp-to-i-if-exp.large", Parent: F, InsertBefore: End);
659
660 Entry->getTerminator()->eraseFromParent();
661
662 // entry:
663 Builder.SetInsertPoint(Entry);
664 // We're going to introduce branches on the value, so freeze it.
665 if (!isGuaranteedNotToBeUndefOrPoison(V: FloatVal))
666 FloatVal = Builder.CreateFreeze(V: FloatVal);
667 // fp80 conversion is implemented by fpext to fp128 first then do the
668 // conversion.
669 if (FloatVal->getType()->isX86_FP80Ty())
670 FloatVal =
671 Builder.CreateFPExt(V: FloatVal, DestTy: Type::getFP128Ty(C&: Builder.getContext()));
672 Value *ARep = Builder.CreateBitCast(V: FloatVal, DestTy: FloatIntTy);
673 Value *PosOrNeg, *Sign;
674 if (IsSigned) {
675 PosOrNeg =
676 Builder.CreateICmpSGT(LHS: ARep, RHS: ConstantInt::getSigned(Ty: FloatIntTy, V: -1));
677 Sign = Builder.CreateSelectWithUnknownProfile(
678 C: PosOrNeg, True: ConstantInt::getSigned(Ty: IntTy, V: 1),
679 False: ConstantInt::getSigned(Ty: IntTy, V: -1), PassName: "sign");
680 }
681 Value *And =
682 Builder.CreateLShr(LHS: ARep, RHS: Builder.getIntN(N: FloatWidth, C: FPMantissaWidth));
683 Value *BiasedExp = Builder.CreateAnd(
684 LHS: And, RHS: Builder.getIntN(N: FloatWidth, C: (1 << ExponentWidth) - 1), Name: "biased.exp");
685 Value *Abs = Builder.CreateAnd(LHS: ARep, RHS: SignificandMask);
686 Value *Significand = Builder.CreateOr(LHS: Abs, RHS: ImplicitBit, Name: "significand");
687 Value *ZeroResultCond = Builder.CreateICmpULT(
688 LHS: BiasedExp, RHS: Builder.getIntN(N: FloatWidth, C: ExponentBias), Name: "exp.is.negative");
689 if (IsSaturating) {
690 Value *IsNaN = Builder.CreateFCmpUNO(LHS: FloatVal, RHS: FloatVal, Name: "is.nan");
691 ZeroResultCond = Builder.CreateOr(LHS: ZeroResultCond, RHS: IsNaN);
692 if (!IsSigned) {
693 Value *IsNeg = Builder.CreateIsNeg(Arg: ARep);
694 ZeroResultCond = Builder.CreateOr(LHS: ZeroResultCond, RHS: IsNeg);
695 }
696 }
697 Instruction *CondBr = Builder.CreateCondBr(
698 Cond: ZeroResultCond, True: End, False: IsSaturating ? CheckSaturateBB : CheckExpSizeBB);
699 // We do not have any information on the value of the exponent, so mark the
700 // branch weights as unkown.
701 setExplicitlyUnknownBranchWeightsIfProfiled(I&: *CondBr, DEBUG_TYPE, F);
702
703 Value *Saturated;
704 if (IsSaturating) {
705 // check.saturate:
706 Builder.SetInsertPoint(CheckSaturateBB);
707 uint64_t SaturatingBiasedExp =
708 static_cast<uint64_t>(ExponentBias) + BitWidth - IsSigned;
709 // Clamp to the all-ones (inf/NaN) exponent. Without this, when the integer
710 // is wide enough to hold every finite float the threshold exceeds any
711 // possible biased exponent, so +/-inf would never saturate.
712 uint64_t MaxBiasedExp = (1ULL << ExponentWidth) - 1;
713 if (SaturatingBiasedExp > MaxBiasedExp)
714 SaturatingBiasedExp = MaxBiasedExp;
715 Value *Cmp3 = Builder.CreateICmpUGE(
716 LHS: BiasedExp, RHS: ConstantInt::get(Ty: FloatIntTy, V: SaturatingBiasedExp));
717 Value *CondBrSat = Builder.CreateCondBr(Cond: Cmp3, True: SaturateBB, False: CheckExpSizeBB);
718 // Saturation is considered an unlikely event.
719 applyProfMetadataIfEnabled(V: CondBrSat, setMetadataCallback: [&](Instruction *Inst) {
720 Inst->setMetadata(
721 KindID: LLVMContext::MD_prof,
722 Node: MDBuilder(Inst->getContext()).createUnlikelyBranchWeights());
723 });
724
725 // saturate:
726 Builder.SetInsertPoint(SaturateBB);
727 if (IsSigned) {
728 Value *SignedMax =
729 ConstantInt::get(Ty: IntTy, V: APInt::getSignedMaxValue(numBits: BitWidth));
730 Value *SignedMin =
731 ConstantInt::get(Ty: IntTy, V: APInt::getSignedMinValue(numBits: BitWidth));
732 // Select between the signed max and min values for saturation.
733 Saturated = Builder.CreateSelectWithUnknownProfile(
734 C: PosOrNeg, True: SignedMax, False: SignedMin, PassName: "saturated");
735 } else {
736 Saturated = ConstantInt::getAllOnesValue(Ty: IntTy);
737 }
738 Builder.CreateBr(Dest: End);
739 }
740
741 // if.end9:
742 Builder.SetInsertPoint(CheckExpSizeBB);
743 Value *ExpSmallerMantissaWidth = Builder.CreateICmpULT(
744 LHS: BiasedExp, RHS: Builder.getIntN(N: FloatWidth, C: ExponentBias + FPMantissaWidth),
745 Name: "exp.smaller.mantissa.width");
746 // We cannot determine whether this is a left shift or a right shift,
747 // so we mark the branch weights as unknown.
748 Value *CondBr2 =
749 Builder.CreateCondBr(Cond: ExpSmallerMantissaWidth, True: ExpSmallBB, False: ExpLargeBB);
750 applyProfMetadataIfEnabled(V: CondBr2, setMetadataCallback: [&](Instruction *Inst) {
751 setExplicitlyUnknownBranchWeightsIfProfiled(I&: *Inst, DEBUG_TYPE, F);
752 });
753
754 // exp.small:
755 Builder.SetInsertPoint(ExpSmallBB);
756 Value *Sub13 = Builder.CreateSub(
757 LHS: Builder.getIntN(N: FloatWidth, C: ExponentBias + FPMantissaWidth), RHS: BiasedExp);
758 Value *ExpSmallRes =
759 Builder.CreateZExtOrTrunc(V: Builder.CreateLShr(LHS: Significand, RHS: Sub13), DestTy: IntTy);
760 if (IsSigned)
761 ExpSmallRes = Builder.CreateMul(LHS: ExpSmallRes, RHS: Sign);
762 Builder.CreateBr(Dest: End);
763
764 // exp.large:
765 Builder.SetInsertPoint(ExpLargeBB);
766 Value *Sub15 = Builder.CreateAdd(
767 LHS: BiasedExp,
768 RHS: ConstantInt::getSigned(
769 Ty: FloatIntTy, V: -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
770 Value *SignificandCast = Builder.CreateZExtOrTrunc(V: Significand, DestTy: IntTy);
771 Value *ExpLargeRes = Builder.CreateShl(
772 LHS: SignificandCast, RHS: Builder.CreateZExtOrTrunc(V: Sub15, DestTy: IntTy));
773 if (IsSigned)
774 ExpLargeRes = Builder.CreateMul(LHS: ExpLargeRes, RHS: Sign);
775 Builder.CreateBr(Dest: End);
776
777 // cleanup:
778 Builder.SetInsertPoint(TheBB: End, IP: End->begin());
779 PHINode *Retval0 = Builder.CreatePHI(Ty: FPToI->getType(), NumReservedValues: 3 + IsSaturating);
780
781 if (IsSaturating)
782 Retval0->addIncoming(V: Saturated, BB: SaturateBB);
783 Retval0->addIncoming(V: ExpSmallRes, BB: ExpSmallBB);
784 Retval0->addIncoming(V: ExpLargeRes, BB: ExpLargeBB);
785 Retval0->addIncoming(V: Builder.getIntN(N: BitWidth, C: 0), BB: Entry);
786
787 FPToI->replaceAllUsesWith(V: Retval0);
788 FPToI->dropAllReferences();
789 FPToI->eraseFromParent();
790}
791
792// clang-format off: preserve formatting of the following example
793
794/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
795/// the generated code. This currently generates code similarly to compiler-rt's
796/// implementations. This implementation has an implicit assumption that integer
797/// width is larger than fp.
798///
799/// An example IR generated from compiler-rt/floatdisf.c looks like below:
800/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
801/// entry:
802/// %cmp = icmp eq i64 %a, 0
803/// br i1 %cmp, label %return, label %if.end
804///
805/// if.end: ; preds = %entry
806/// %shr = ashr i64 %a, 63
807/// %xor = xor i64 %shr, %a
808/// %sub = sub nsw i64 %xor, %shr
809/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
810/// %cast = trunc i64 %0 to i32
811/// %sub1 = sub nuw nsw i32 64, %cast
812/// %sub2 = xor i32 %cast, 63
813/// %cmp3 = icmp ult i32 %cast, 40
814/// br i1 %cmp3, label %if.then4, label %if.else
815///
816/// if.then4: ; preds = %if.end
817/// switch i32 %sub1, label %sw.default [
818/// i32 25, label %sw.bb
819/// i32 26, label %sw.epilog
820/// ]
821///
822/// sw.bb: ; preds = %if.then4
823/// %shl = shl i64 %sub, 1
824/// br label %sw.epilog
825///
826/// sw.default: ; preds = %if.then4
827/// %sub5 = sub nsw i64 38, %0
828/// %sh_prom = and i64 %sub5, 4294967295
829/// %shr6 = lshr i64 %sub, %sh_prom
830/// %shr9 = lshr i64 274877906943, %0
831/// %and = and i64 %shr9, %sub
832/// %cmp10 = icmp ne i64 %and, 0
833/// %conv11 = zext i1 %cmp10 to i64
834/// %or = or i64 %shr6, %conv11
835/// br label %sw.epilog
836///
837/// sw.epilog: ; preds = %sw.default,
838/// %if.then4, %sw.bb
839/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl,
840/// %sw.bb ] %1 = lshr i64 %a.addr.0, 2 %2 = and i64 %1, 1 %or16 = or i64 %2,
841/// %a.addr.0 %inc = add nsw i64 %or16, 1 %3 = and i64 %inc, 67108864
842/// %tobool.not = icmp eq i64 %3, 0
843/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
844/// %spec.select = ashr i64 %inc, %spec.select.v
845/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
846/// br label %if.end26
847///
848/// if.else: ; preds = %if.end
849/// %sub23 = add nuw nsw i64 %0, 4294967256
850/// %sh_prom24 = and i64 %sub23, 4294967295
851/// %shl25 = shl i64 %sub, %sh_prom24
852/// br label %if.end26
853///
854/// if.end26: ; preds = %sw.epilog,
855/// %if.else
856/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
857/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
858/// %conv27 = trunc i64 %shr to i32
859/// %and28 = and i32 %conv27, -2147483648
860/// %add = shl nuw nsw i32 %e.0, 23
861/// %shl29 = add nuw nsw i32 %add, 1065353216
862/// %conv31 = trunc i64 %a.addr.1 to i32
863/// %and32 = and i32 %conv31, 8388607
864/// %or30 = or i32 %and32, %and28
865/// %or33 = or i32 %or30, %shl29
866/// %4 = bitcast i32 %or33 to float
867/// br label %return
868///
869/// return: ; preds = %entry,
870/// %if.end26
871/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
872/// ret float %retval.0
873/// }
874///
875/// Replace integer to fp with generated code.
876static void expandIToFP(Instruction *IToFP) {
877 // clang-format on
878 IRBuilder<> Builder(IToFP);
879 auto *IntVal = IToFP->getOperand(i: 0);
880 IntegerType *IntTy = cast<IntegerType>(Val: IntVal->getType());
881
882 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
883 unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
884 // fp80 conversion is implemented by conversion tp fp128 first following
885 // a fptrunc to fp80.
886 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
887 // FIXME: As there is no related builtins added in compliler-rt,
888 // here currently utilized the fp32 <-> fp16 lib calls to implement.
889 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
890 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
891 unsigned FloatWidth = PowerOf2Ceil(A: FPMantissaWidth);
892 bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
893
894 // We're going to introduce branches on the value, so freeze it.
895 if (!isGuaranteedNotToBeUndefOrPoison(V: IntVal))
896 IntVal = Builder.CreateFreeze(V: IntVal);
897
898 // The expansion below assumes that int width >= float width. Zero or sign
899 // extend the integer accordingly.
900 if (BitWidth < FloatWidth) {
901 BitWidth = FloatWidth;
902 IntTy = Builder.getIntNTy(N: BitWidth);
903 IntVal = Builder.CreateIntCast(V: IntVal, DestTy: IntTy, isSigned: IsSigned);
904 }
905
906 Value *Temp1 =
907 Builder.CreateShl(LHS: Builder.getIntN(N: BitWidth, C: 1),
908 RHS: Builder.getIntN(N: BitWidth, C: FPMantissaWidth + 3));
909
910 BasicBlock *Entry = Builder.GetInsertBlock();
911 Function *F = Entry->getParent();
912 Entry->setName(Twine(Entry->getName(), "itofp-entry"));
913 BasicBlock *End =
914 Entry->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "itofp-return");
915 BasicBlock *IfEnd =
916 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-end", Parent: F, InsertBefore: End);
917 BasicBlock *IfThen4 =
918 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-then4", Parent: F, InsertBefore: End);
919 BasicBlock *SwBB =
920 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-sw-bb", Parent: F, InsertBefore: End);
921 BasicBlock *SwDefault =
922 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-sw-default", Parent: F, InsertBefore: End);
923 BasicBlock *SwEpilog =
924 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-sw-epilog", Parent: F, InsertBefore: End);
925 BasicBlock *IfThen20 =
926 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-then20", Parent: F, InsertBefore: End);
927 BasicBlock *IfElse =
928 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-else", Parent: F, InsertBefore: End);
929 BasicBlock *IfEnd26 =
930 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-end26", Parent: F, InsertBefore: End);
931
932 Entry->getTerminator()->eraseFromParent();
933
934 Function *CTLZ =
935 Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: Intrinsic::ctlz, OverloadTys: IntTy);
936 ConstantInt *True = Builder.getTrue();
937
938 // entry:
939 Builder.SetInsertPoint(Entry);
940 // We assume that the zero is an unlikely input case, so the branch to 'End'
941 // is the unlikely path.
942 Value *Cmp = Builder.CreateICmpEQ(LHS: IntVal, RHS: ConstantInt::getSigned(Ty: IntTy, V: 0));
943 Value *CondBrEntry = Builder.CreateCondBr(Cond: Cmp, True: End, False: IfEnd);
944 applyProfMetadataIfEnabled(V: CondBrEntry, setMetadataCallback: [&](Instruction *Inst) {
945 Inst->setMetadata(
946 KindID: LLVMContext::MD_prof,
947 Node: MDBuilder(Inst->getContext()).createUnlikelyBranchWeights());
948 });
949
950 // if.end:
951 Builder.SetInsertPoint(IfEnd);
952 Value *Shr =
953 Builder.CreateAShr(LHS: IntVal, RHS: Builder.getIntN(N: BitWidth, C: BitWidth - 1));
954 Value *Xor = Builder.CreateXor(LHS: Shr, RHS: IntVal);
955 Value *Sub = Builder.CreateSub(LHS: Xor, RHS: Shr);
956 Value *Call = Builder.CreateCall(Callee: CTLZ, Args: {IsSigned ? Sub : IntVal, True});
957 Value *Cast = Builder.CreateTrunc(V: Call, DestTy: Builder.getInt32Ty());
958 int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
959 Value *Sub1 = Builder.CreateSub(LHS: Builder.getIntN(N: BitWidthNew, C: BitWidth),
960 RHS: FloatWidth == 128 ? Call : Cast);
961 Value *Sub2 = Builder.CreateSub(LHS: Builder.getIntN(N: BitWidthNew, C: BitWidth - 1),
962 RHS: FloatWidth == 128 ? Call : Cast);
963 Value *Cmp3 = Builder.CreateICmpSGT(
964 LHS: Sub1, RHS: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + 1));
965 // This branch handles the rare case where rounding the mantissa causes a
966 // carry-out at the most significant bit, necessitating an increment of the
967 // exponent. This is rare case, so the True path is mared as likely.
968 Value *CondBrIfEnd = Builder.CreateCondBr(Cond: Cmp3, True: IfThen4, False: IfElse);
969 applyProfMetadataIfEnabled(V: CondBrIfEnd, setMetadataCallback: [&](Instruction *Inst) {
970 Inst->setMetadata(
971 KindID: LLVMContext::MD_prof,
972 Node: MDBuilder(Inst->getContext()).createLikelyBranchWeights());
973 });
974
975 // if.then4:
976 Builder.SetInsertPoint(IfThen4);
977 SwitchInst *SI = Builder.CreateSwitch(V: Sub1, Dest: SwDefault);
978 SI->addCase(OnVal: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + 2), Dest: SwBB);
979 SI->addCase(OnVal: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + 3), Dest: SwEpilog);
980 // Add branch weights to the SwitchInst. The weights are provided for the
981 // default case first (SwDefault), followed by each explicit case in the
982 // order they were added (SwBB, then SwEpilog). Because the following cases
983 // are rare, the defalut case is given a likely weight.
984 if (!ProfcheckDisableMetadataFixes) {
985 SI->setMetadata(
986 KindID: LLVMContext::MD_prof,
987 Node: MDBuilder(SI->getContext())
988 .createBranchWeights(Weights: {llvm::MDBuilder::kLikelyBranchWeight,
989 llvm::MDBuilder::kUnlikelyBranchWeight,
990 llvm::MDBuilder::kUnlikelyBranchWeight}));
991 }
992
993 // sw.bb:
994 Builder.SetInsertPoint(SwBB);
995 Value *Shl =
996 Builder.CreateShl(LHS: IsSigned ? Sub : IntVal, RHS: Builder.getIntN(N: BitWidth, C: 1));
997 Builder.CreateBr(Dest: SwEpilog);
998
999 // sw.default:
1000 Builder.SetInsertPoint(SwDefault);
1001 Value *Sub5 = Builder.CreateSub(
1002 LHS: Builder.getIntN(N: BitWidthNew, C: BitWidth - FPMantissaWidth - 3),
1003 RHS: FloatWidth == 128 ? Call : Cast);
1004 Value *ShProm = Builder.CreateZExt(V: Sub5, DestTy: IntTy);
1005 Value *Shr6 = Builder.CreateLShr(LHS: IsSigned ? Sub : IntVal,
1006 RHS: FloatWidth == 128 ? Sub5 : ShProm);
1007 Value *Sub8 =
1008 Builder.CreateAdd(LHS: FloatWidth == 128 ? Call : Cast,
1009 RHS: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + 3));
1010 Value *ShProm9 = Builder.CreateZExt(V: Sub8, DestTy: IntTy);
1011 Value *Shr9 = Builder.CreateLShr(LHS: ConstantInt::getSigned(Ty: IntTy, V: -1),
1012 RHS: FloatWidth == 128 ? Sub8 : ShProm9);
1013 Value *And = Builder.CreateAnd(LHS: Shr9, RHS: IsSigned ? Sub : IntVal);
1014 Value *Cmp10 = Builder.CreateICmpNE(LHS: And, RHS: Builder.getIntN(N: BitWidth, C: 0));
1015 Value *Conv11 = Builder.CreateZExt(V: Cmp10, DestTy: IntTy);
1016 Value *Or = Builder.CreateOr(LHS: Shr6, RHS: Conv11);
1017 Builder.CreateBr(Dest: SwEpilog);
1018
1019 // sw.epilog:
1020 Builder.SetInsertPoint(SwEpilog);
1021 PHINode *AAddr0 = Builder.CreatePHI(Ty: IntTy, NumReservedValues: 3);
1022 AAddr0->addIncoming(V: Or, BB: SwDefault);
1023 AAddr0->addIncoming(V: IsSigned ? Sub : IntVal, BB: IfThen4);
1024 AAddr0->addIncoming(V: Shl, BB: SwBB);
1025 Value *A0 = Builder.CreateTrunc(V: AAddr0, DestTy: Builder.getInt32Ty());
1026 Value *A1 = Builder.CreateLShr(LHS: A0, RHS: Builder.getInt32(C: 2));
1027 Value *A2 = Builder.CreateAnd(LHS: A1, RHS: Builder.getInt32(C: 1));
1028 Value *Conv16 = Builder.CreateZExt(V: A2, DestTy: IntTy);
1029 Value *Or17 = Builder.CreateOr(LHS: AAddr0, RHS: Conv16);
1030 Value *Inc = Builder.CreateAdd(LHS: Or17, RHS: Builder.getIntN(N: BitWidth, C: 1));
1031 Value *Shr18 = nullptr;
1032 if (IsSigned)
1033 Shr18 = Builder.CreateAShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: 2));
1034 else
1035 Shr18 = Builder.CreateLShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: 2));
1036 Value *A3 = Builder.CreateAnd(LHS: Inc, RHS: Temp1, Name: "a3");
1037 Value *PosOrNeg = Builder.CreateICmpEQ(LHS: A3, RHS: Builder.getIntN(N: BitWidth, C: 0));
1038 Value *ExtractT60 = Builder.CreateTrunc(V: Shr18, DestTy: Builder.getIntNTy(N: FloatWidth));
1039 Value *Extract63 = Builder.CreateLShr(LHS: Shr18, RHS: Builder.getIntN(N: BitWidth, C: 32));
1040 Value *ExtractT64 = nullptr;
1041 if (FloatWidth > 80)
1042 ExtractT64 = Builder.CreateTrunc(V: Sub2, DestTy: Builder.getInt64Ty());
1043 else
1044 ExtractT64 = Builder.CreateTrunc(V: Extract63, DestTy: Builder.getInt32Ty());
1045 // Rounding usually keeps the exponent within its current magnitude and
1046 // overflow is rare. The False path is unlikely to be taken.
1047 Value *CondBrSwEpilog = Builder.CreateCondBr(Cond: PosOrNeg, True: IfEnd26, False: IfThen20);
1048 applyProfMetadataIfEnabled(V: CondBrSwEpilog, setMetadataCallback: [&](Instruction *Inst) {
1049 Inst->setMetadata(
1050 KindID: LLVMContext::MD_prof,
1051 Node: MDBuilder(Inst->getContext()).createLikelyBranchWeights());
1052 });
1053
1054 // if.then20
1055 Builder.SetInsertPoint(IfThen20);
1056 Value *Shr21 = nullptr;
1057 if (IsSigned)
1058 Shr21 = Builder.CreateAShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: 3));
1059 else
1060 Shr21 = Builder.CreateLShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: 3));
1061 Value *ExtractT = Builder.CreateTrunc(V: Shr21, DestTy: Builder.getIntNTy(N: FloatWidth));
1062 Value *Extract = Builder.CreateLShr(LHS: Shr21, RHS: Builder.getIntN(N: BitWidth, C: 32));
1063 Value *ExtractT62 = nullptr;
1064 if (FloatWidth > 80)
1065 ExtractT62 = Builder.CreateTrunc(V: Sub1, DestTy: Builder.getInt64Ty());
1066 else
1067 ExtractT62 = Builder.CreateTrunc(V: Extract, DestTy: Builder.getInt32Ty());
1068 Builder.CreateBr(Dest: IfEnd26);
1069
1070 // if.else:
1071 Builder.SetInsertPoint(IfElse);
1072 Value *Sub24 = Builder.CreateAdd(
1073 LHS: FloatWidth == 128 ? Call : Cast,
1074 RHS: ConstantInt::getSigned(Ty: Builder.getIntNTy(N: BitWidthNew),
1075 V: -(int)(BitWidth - FPMantissaWidth - 1)));
1076 Value *ShProm25 = Builder.CreateZExt(V: Sub24, DestTy: IntTy);
1077 Value *Shl26 = Builder.CreateShl(LHS: IsSigned ? Sub : IntVal,
1078 RHS: FloatWidth == 128 ? Sub24 : ShProm25);
1079 Value *ExtractT61 = Builder.CreateTrunc(V: Shl26, DestTy: Builder.getIntNTy(N: FloatWidth));
1080 Value *Extract65 = Builder.CreateLShr(LHS: Shl26, RHS: Builder.getIntN(N: BitWidth, C: 32));
1081 Value *ExtractT66 = nullptr;
1082 if (FloatWidth > 80)
1083 ExtractT66 = Builder.CreateTrunc(V: Sub2, DestTy: Builder.getInt64Ty());
1084 else
1085 ExtractT66 = Builder.CreateTrunc(V: Extract65, DestTy: Builder.getInt32Ty());
1086 Builder.CreateBr(Dest: IfEnd26);
1087
1088 // if.end26:
1089 Builder.SetInsertPoint(IfEnd26);
1090 PHINode *AAddr1Off0 = Builder.CreatePHI(Ty: Builder.getIntNTy(N: FloatWidth), NumReservedValues: 3);
1091 AAddr1Off0->addIncoming(V: ExtractT, BB: IfThen20);
1092 AAddr1Off0->addIncoming(V: ExtractT60, BB: SwEpilog);
1093 AAddr1Off0->addIncoming(V: ExtractT61, BB: IfElse);
1094 PHINode *AAddr1Off32 = nullptr;
1095 if (FloatWidth > 32) {
1096 AAddr1Off32 =
1097 Builder.CreatePHI(Ty: Builder.getIntNTy(N: FloatWidth > 80 ? 64 : 32), NumReservedValues: 3);
1098 AAddr1Off32->addIncoming(V: ExtractT62, BB: IfThen20);
1099 AAddr1Off32->addIncoming(V: ExtractT64, BB: SwEpilog);
1100 AAddr1Off32->addIncoming(V: ExtractT66, BB: IfElse);
1101 }
1102 PHINode *E0 = nullptr;
1103 if (FloatWidth <= 80) {
1104 E0 = Builder.CreatePHI(Ty: Builder.getIntNTy(N: BitWidthNew), NumReservedValues: 3);
1105 E0->addIncoming(V: Sub1, BB: IfThen20);
1106 E0->addIncoming(V: Sub2, BB: SwEpilog);
1107 E0->addIncoming(V: Sub2, BB: IfElse);
1108 }
1109 Value *And29 = nullptr;
1110 if (FloatWidth > 80) {
1111 Value *Temp2 = Builder.CreateShl(LHS: Builder.getIntN(N: BitWidth, C: 1),
1112 RHS: Builder.getIntN(N: BitWidth, C: 63));
1113 And29 = Builder.CreateAnd(LHS: Shr, RHS: Temp2, Name: "and29");
1114 } else {
1115 Value *Conv28 = Builder.CreateTrunc(V: Shr, DestTy: Builder.getInt32Ty());
1116 And29 = Builder.CreateAnd(
1117 LHS: Conv28, RHS: ConstantInt::get(Context&: Builder.getContext(), V: APInt::getSignMask(BitWidth: 32)));
1118 }
1119 unsigned TempMod = FPMantissaWidth % 32;
1120 Value *And34 = nullptr;
1121 Value *Shl30 = nullptr;
1122 if (FloatWidth > 80) {
1123 TempMod += 32;
1124 Value *Add = Builder.CreateShl(LHS: AAddr1Off32, RHS: Builder.getInt64(C: TempMod));
1125 Shl30 = Builder.CreateAdd(
1126 LHS: Add, RHS: Builder.getInt64(C: ((1ull << (62ull - TempMod)) - 1ull) << TempMod));
1127 And34 = Builder.CreateZExt(V: Shl30, DestTy: Builder.getInt128Ty());
1128 } else {
1129 Value *Add = Builder.CreateShl(LHS: E0, RHS: Builder.getInt32(C: TempMod));
1130 Shl30 = Builder.CreateAdd(
1131 LHS: Add, RHS: Builder.getInt32(C: ((1 << (30 - TempMod)) - 1) << TempMod));
1132 And34 = Builder.CreateAnd(LHS: FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
1133 RHS: Builder.getInt32(C: (1 << TempMod) - 1));
1134 }
1135 Value *Or35 = nullptr;
1136 if (FloatWidth > 80) {
1137 Value *And29Trunc = Builder.CreateTrunc(V: And29, DestTy: Builder.getInt128Ty());
1138 Value *Or31 = Builder.CreateOr(LHS: And29Trunc, RHS: And34);
1139 Value *Or34 = Builder.CreateShl(LHS: Or31, RHS: Builder.getIntN(N: 128, C: 64));
1140 Value *Temp3 = Builder.CreateShl(LHS: Builder.getIntN(N: 128, C: 1),
1141 RHS: Builder.getIntN(N: 128, C: FPMantissaWidth));
1142 Value *Temp4 = Builder.CreateSub(LHS: Temp3, RHS: Builder.getIntN(N: 128, C: 1));
1143 Value *A6 = Builder.CreateAnd(LHS: AAddr1Off0, RHS: Temp4);
1144 Or35 = Builder.CreateOr(LHS: Or34, RHS: A6);
1145 } else {
1146 Value *Or31 = Builder.CreateOr(LHS: And34, RHS: And29);
1147 Or35 = Builder.CreateOr(LHS: IsSigned ? Or31 : And34, RHS: Shl30);
1148 }
1149 Value *A4 = nullptr;
1150 if (IToFP->getType()->isDoubleTy()) {
1151 Value *ZExt1 = Builder.CreateZExt(V: Or35, DestTy: Builder.getIntNTy(N: FloatWidth));
1152 Value *Shl1 = Builder.CreateShl(LHS: ZExt1, RHS: Builder.getIntN(N: FloatWidth, C: 32));
1153 Value *And1 =
1154 Builder.CreateAnd(LHS: AAddr1Off0, RHS: Builder.getIntN(N: FloatWidth, C: 0xFFFFFFFF));
1155 Value *Or1 = Builder.CreateOr(LHS: Shl1, RHS: And1);
1156 A4 = Builder.CreateBitCast(V: Or1, DestTy: IToFP->getType());
1157 } else if (IToFP->getType()->isX86_FP80Ty()) {
1158 Value *A40 =
1159 Builder.CreateBitCast(V: Or35, DestTy: Type::getFP128Ty(C&: Builder.getContext()));
1160 A4 = Builder.CreateFPTrunc(V: A40, DestTy: IToFP->getType());
1161 } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {
1162 // Deal with "half" situation. This is a workaround since we don't have
1163 // floattihf.c currently as referring.
1164 Value *A40 =
1165 Builder.CreateBitCast(V: Or35, DestTy: Type::getFloatTy(C&: Builder.getContext()));
1166 A4 = Builder.CreateFPTrunc(V: A40, DestTy: IToFP->getType());
1167 } else // float type
1168 A4 = Builder.CreateBitCast(V: Or35, DestTy: IToFP->getType());
1169
1170 // Sub2 is the unbiased exponent (the index of the top set bit in the input).
1171 // The exponent arithmetic above wraps to garbage instead of inf once it
1172 // overflows the exponent field, so saturate to a correctly-signed infinity
1173 // when Sub2 reaches 1 << (ExponentWidth - 1). Sub2 is at most BitWidth - 1,
1174 // so skip the check entirely when even that can't reach the threshold.
1175 // (Values that round *up* into inf, e.g. 2^n - 1, keep Sub2 = BitWidth - 1;
1176 // these are handled by the conversion's own rounding, not by this
1177 // saturation.)
1178 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
1179 uint64_t MinInfExp = 1ULL << (ExponentWidth - 1);
1180 if (BitWidth - 1 >= MinInfExp) {
1181 Value *MinInfExpVal = Builder.getIntN(N: BitWidthNew, C: MinInfExp);
1182 Value *Overflow = Builder.CreateICmpUGE(LHS: Sub2, RHS: MinInfExpVal);
1183 Value *Inf = ConstantFP::getInfinity(Ty: IToFP->getType(), /*Negative=*/false);
1184 if (IsSigned) {
1185 Value *NegInf =
1186 ConstantFP::getInfinity(Ty: IToFP->getType(), /*Negative=*/true);
1187 Value *IsNeg =
1188 Builder.CreateICmpSLT(LHS: IntVal, RHS: ConstantInt::getNullValue(Ty: IntTy));
1189 Inf = Builder.CreateSelectWithUnknownProfile(C: IsNeg, True: NegInf, False: Inf,
1190 DEBUG_TYPE);
1191 }
1192 A4 = Builder.CreateSelect(C: Overflow, True: Inf, False: A4);
1193 // We consider overflow to be an unlikely case.
1194 applyProfMetadataIfEnabled(V: A4, setMetadataCallback: [&](Instruction *Inst) {
1195 Inst->setMetadata(
1196 KindID: LLVMContext::MD_prof,
1197 Node: MDBuilder(Inst->getContext()).createUnlikelyBranchWeights());
1198 });
1199 }
1200 Builder.CreateBr(Dest: End);
1201
1202 // return:
1203 Builder.SetInsertPoint(TheBB: End, IP: End->begin());
1204 PHINode *Retval0 = Builder.CreatePHI(Ty: IToFP->getType(), NumReservedValues: 2);
1205 Retval0->addIncoming(V: A4, BB: IfEnd26);
1206 Retval0->addIncoming(V: ConstantFP::getZero(Ty: IToFP->getType(), Negative: false), BB: Entry);
1207
1208 IToFP->replaceAllUsesWith(V: Retval0);
1209 IToFP->dropAllReferences();
1210 IToFP->eraseFromParent();
1211}
1212
1213static void scalarize(Instruction *I,
1214 SmallVectorImpl<Instruction *> &Worklist) {
1215 VectorType *VTy = cast<FixedVectorType>(Val: I->getType());
1216
1217 IRBuilder<> Builder(I);
1218
1219 unsigned NumElements = VTy->getElementCount().getFixedValue();
1220 Value *Result = PoisonValue::get(T: VTy);
1221 for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
1222 Value *Ext = Builder.CreateExtractElement(Vec: I->getOperand(i: 0), Idx);
1223
1224 Value *NewOp = nullptr;
1225 if (auto *BinOp = dyn_cast<BinaryOperator>(Val: I))
1226 NewOp = Builder.CreateBinOp(
1227 Opc: BinOp->getOpcode(), LHS: Ext,
1228 RHS: Builder.CreateExtractElement(Vec: I->getOperand(i: 1), Idx));
1229 else if (auto *CastI = dyn_cast<CastInst>(Val: I))
1230 NewOp = Builder.CreateCast(Op: CastI->getOpcode(), V: Ext,
1231 DestTy: I->getType()->getScalarType());
1232 else if (auto *II = dyn_cast<IntrinsicInst>(Val: I)) {
1233 assert(II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1234 II->getIntrinsicID() == Intrinsic::fptosi_sat);
1235 NewOp = Builder.CreateIntrinsic(RetTy: I->getType()->getScalarType(),
1236 ID: II->getIntrinsicID(), Args: {Ext});
1237 } else
1238 llvm_unreachable("Unsupported instruction type");
1239
1240 Result = Builder.CreateInsertElement(Vec: Result, NewElt: NewOp, Idx);
1241 if (auto *ScalarizedI = dyn_cast<Instruction>(Val: NewOp)) {
1242 ScalarizedI->copyIRFlags(V: I, IncludeWrapFlags: true);
1243 Worklist.push_back(Elt: ScalarizedI);
1244 }
1245 }
1246
1247 I->replaceAllUsesWith(V: Result);
1248 I->dropAllReferences();
1249 I->eraseFromParent();
1250}
1251
1252static void addToWorklist(Instruction &I,
1253 SmallVector<Instruction *, 4> &Worklist) {
1254 if (I.getOperand(i: 0)->getType()->isVectorTy())
1255 scalarize(I: &I, Worklist);
1256 else
1257 Worklist.push_back(Elt: &I);
1258}
1259
1260static bool runImpl(Function &F, const TargetLowering &TLI,
1261 const LibcallLoweringInfo &Libcalls, AssumptionCache *AC) {
1262 SmallVector<Instruction *, 4> Worklist;
1263
1264 unsigned MaxLegalFpConvertBitWidth =
1265 TLI.getMaxLargeFPConvertBitWidthSupported();
1266 if (ExpandFpConvertBits != IntegerType::MAX_INT_BITS)
1267 MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
1268
1269 unsigned MaxLegalDivRemBitWidth = TLI.getMaxDivRemBitWidthSupported();
1270 if (ExpandDivRemBits != IntegerType::MAX_INT_BITS)
1271 MaxLegalDivRemBitWidth = ExpandDivRemBits;
1272
1273 bool DisableExpandLargeFp =
1274 MaxLegalFpConvertBitWidth >= IntegerType::MAX_INT_BITS;
1275 bool DisableExpandLargeDivRem =
1276 MaxLegalDivRemBitWidth >= IntegerType::MAX_INT_BITS;
1277 bool DisableFrem = !FRemExpander::shouldExpandAnyFremType(TLI);
1278
1279 if (DisableExpandLargeFp && DisableFrem && DisableExpandLargeDivRem)
1280 return false;
1281
1282 auto ShouldHandleInst = [&](Instruction &I) {
1283 Type *Ty = I.getType();
1284 // TODO: This pass doesn't handle scalable vectors.
1285 if (Ty->isScalableTy())
1286 return false;
1287
1288 switch (I.getOpcode()) {
1289 case Instruction::FRem:
1290 return !DisableFrem && FRemExpander::shouldExpandFremType(TLI, Ty);
1291 case Instruction::FPToUI:
1292 case Instruction::FPToSI:
1293 return !DisableExpandLargeFp &&
1294 cast<IntegerType>(Val: Ty->getScalarType())->getIntegerBitWidth() >
1295 MaxLegalFpConvertBitWidth;
1296 case Instruction::UIToFP:
1297 case Instruction::SIToFP:
1298 return !DisableExpandLargeFp &&
1299 cast<IntegerType>(Val: I.getOperand(i: 0)->getType()->getScalarType())
1300 ->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
1301 case Instruction::UDiv:
1302 case Instruction::SDiv:
1303 case Instruction::URem:
1304 case Instruction::SRem:
1305 // Power-of-2 divisors are handled inside the expansion (via efficient
1306 // shift/mask sequences) rather than being excluded here, so that
1307 // backends that cannot lower wide div/rem even for powers of two
1308 // (e.g. when DAGCombiner is disabled) still get valid lowered code.
1309 return !DisableExpandLargeDivRem &&
1310 cast<IntegerType>(Val: Ty->getScalarType())->getIntegerBitWidth() >
1311 MaxLegalDivRemBitWidth;
1312 case Instruction::Call: {
1313 auto *II = dyn_cast<IntrinsicInst>(Val: &I);
1314 if (II && (II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1315 II->getIntrinsicID() == Intrinsic::fptosi_sat)) {
1316 return !DisableExpandLargeFp &&
1317 cast<IntegerType>(Val: Ty->getScalarType())->getIntegerBitWidth() >
1318 MaxLegalFpConvertBitWidth;
1319 }
1320 return false;
1321 }
1322 }
1323
1324 return false;
1325 };
1326
1327 bool Modified = false;
1328 for (auto It = inst_begin(F: &F), End = inst_end(F); It != End;) {
1329 Instruction &I = *It++;
1330 if (!ShouldHandleInst(I))
1331 continue;
1332
1333 addToWorklist(I, Worklist);
1334 Modified = true;
1335 }
1336
1337 while (!Worklist.empty()) {
1338 Instruction *I = Worklist.pop_back_val();
1339
1340 switch (I->getOpcode()) {
1341 case Instruction::FRem: {
1342 auto SQ = [&]() -> std::optional<SimplifyQuery> {
1343 if (AC) {
1344 auto Res = std::make_optional<SimplifyQuery>(
1345 args: I->getModule()->getDataLayout(), args&: I);
1346 Res->AC = AC;
1347 return Res;
1348 }
1349 return {};
1350 }();
1351
1352 expandFRem(I&: cast<BinaryOperator>(Val&: *I), SQ);
1353 break;
1354 }
1355
1356 case Instruction::FPToUI:
1357 expandFPToI(FPToI: I, /*IsSaturating=*/false, /*IsSigned=*/false);
1358 break;
1359 case Instruction::FPToSI:
1360 expandFPToI(FPToI: I, /*IsSaturating=*/false, /*IsSigned=*/true);
1361 break;
1362
1363 case Instruction::UIToFP:
1364 case Instruction::SIToFP:
1365 expandIToFP(IToFP: I);
1366 break;
1367
1368 case Instruction::UDiv:
1369 case Instruction::SDiv:
1370 case Instruction::URem:
1371 case Instruction::SRem: {
1372 auto *BO = cast<BinaryOperator>(Val: I);
1373 // TODO: isConstantPowerOfTwo does not handle vector constants, so
1374 // vector div/rem by a power-of-2 splat goes through the generic path.
1375 if (isConstantPowerOfTwo(V: BO->getOperand(i_nocapture: 1), SignedOp: isSigned(Opcode: BO->getOpcode()))) {
1376 expandPow2DivRem(BO);
1377 } else {
1378 unsigned Opc = BO->getOpcode();
1379 if (Opc == Instruction::UDiv || Opc == Instruction::SDiv)
1380 expandDivision(Div: BO);
1381 else
1382 expandRemainder(Rem: BO);
1383 }
1384 break;
1385 }
1386 case Instruction::Call: {
1387 auto *II = cast<IntrinsicInst>(Val: I);
1388 assert(II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1389 II->getIntrinsicID() == Intrinsic::fptosi_sat);
1390 expandFPToI(FPToI: I, /*IsSaturating=*/true,
1391 /*IsSigned=*/II->getIntrinsicID() == Intrinsic::fptosi_sat);
1392 break;
1393 }
1394 }
1395 }
1396
1397 return Modified;
1398}
1399
1400namespace {
1401class ExpandIRInstsLegacyPass : public FunctionPass {
1402 CodeGenOptLevel OptLevel;
1403
1404public:
1405 static char ID;
1406
1407 ExpandIRInstsLegacyPass(CodeGenOptLevel OptLevel)
1408 : FunctionPass(ID), OptLevel(OptLevel) {}
1409
1410 ExpandIRInstsLegacyPass() : ExpandIRInstsLegacyPass(CodeGenOptLevel::None) {}
1411
1412 bool runOnFunction(Function &F) override {
1413 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1414 const TargetSubtargetInfo *Subtarget = TM->getSubtargetImpl(F);
1415 auto *TLI = Subtarget->getTargetLowering();
1416 AssumptionCache *AC = nullptr;
1417
1418 const LibcallLoweringInfo &Libcalls =
1419 getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
1420 M: *F.getParent(), Subtarget: *Subtarget);
1421
1422 if (OptLevel != CodeGenOptLevel::None && !F.hasOptNone())
1423 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1424 return runImpl(F, TLI: *TLI, Libcalls, AC);
1425 }
1426
1427 void getAnalysisUsage(AnalysisUsage &AU) const override {
1428 AU.addRequired<LibcallLoweringInfoWrapper>();
1429 AU.addRequired<TargetPassConfig>();
1430 if (OptLevel != CodeGenOptLevel::None)
1431 AU.addRequired<AssumptionCacheTracker>();
1432 AU.addPreserved<AAResultsWrapperPass>();
1433 AU.addPreserved<GlobalsAAWrapperPass>();
1434 AU.addRequired<LibcallLoweringInfoWrapper>();
1435 }
1436};
1437} // namespace
1438
1439ExpandIRInstsPass::ExpandIRInstsPass(const TargetMachine &TM,
1440 CodeGenOptLevel OptLevel)
1441 : TM(&TM), OptLevel(OptLevel) {}
1442
1443void ExpandIRInstsPass::printPipeline(
1444 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1445 static_cast<PassInfoMixin<ExpandIRInstsPass> *>(this)->printPipeline(
1446 OS, MapClassName2PassName);
1447 OS << '<';
1448 OS << "O" << (int)OptLevel;
1449 OS << '>';
1450}
1451
1452PreservedAnalyses ExpandIRInstsPass::run(Function &F,
1453 FunctionAnalysisManager &FAM) {
1454 const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
1455 auto &TLI = *STI->getTargetLowering();
1456 AssumptionCache *AC = nullptr;
1457 if (OptLevel != CodeGenOptLevel::None)
1458 AC = &FAM.getResult<AssumptionAnalysis>(IR&: F);
1459
1460 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
1461
1462 const LibcallLoweringModuleAnalysisResult *LibcallLowering =
1463 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(IR&: *F.getParent());
1464
1465 if (!LibcallLowering) {
1466 F.getContext().emitError(ErrorStr: "'" + LibcallLoweringModuleAnalysis::name() +
1467 "' analysis required");
1468 return PreservedAnalyses::all();
1469 }
1470
1471 const LibcallLoweringInfo &Libcalls =
1472 LibcallLowering->getLibcallLowering(Subtarget: *STI);
1473
1474 return runImpl(F, TLI, Libcalls, AC) ? PreservedAnalyses::none()
1475 : PreservedAnalyses::all();
1476}
1477
1478char ExpandIRInstsLegacyPass::ID = 0;
1479INITIALIZE_PASS_BEGIN(ExpandIRInstsLegacyPass, "expand-ir-insts",
1480 "Expand certain fp instructions", false, false)
1481INITIALIZE_PASS_DEPENDENCY(LibcallLoweringInfoWrapper)
1482INITIALIZE_PASS_END(ExpandIRInstsLegacyPass, "expand-ir-insts",
1483 "Expand IR instructions", false, false)
1484
1485FunctionPass *llvm::createExpandIRInstsPass(CodeGenOptLevel OptLevel) {
1486 return new ExpandIRInstsLegacyPass(OptLevel);
1487}
1488