1//===--- ExpandIRInsts.cpp - Expand IR instructions -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This pass expands certain instructions at the IR level.
9//
10// The following expansions are implemented:
11// - Expansion of ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’, ‘sitofp
12// .. to’ instructions with a bitwidth above a threshold. This is
13// useful for targets like x86_64 that cannot lower fp convertions
14// with more than 128 bits.
15//
16// - Expansion of ‘frem‘ for types MVT::f16, MVT::f32, and MVT::f64 for
17// targets which use "Expand" as the legalization action for the
18// corresponding type.
19//
20// - Expansion of ‘udiv‘, ‘sdiv‘, ‘urem‘, and ‘srem‘ instructions with
21// a bitwidth above a threshold into a call to auto-generated
22// functions. This is useful for targets like x86_64 that cannot
23// lower divisions with more than 128 bits or targets like x86_32 that
24// cannot lower divisions with more than 64 bits.
25//
26// Instructions with vector types are scalarized first if their scalar
27// types can be expanded. Scalable vector types are not supported.
28//===----------------------------------------------------------------------===//
29
30#include "llvm/CodeGen/ExpandIRInsts.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/Analysis/AssumptionCache.h"
33#include "llvm/Analysis/GlobalsModRef.h"
34#include "llvm/Analysis/SimplifyQuery.h"
35#include "llvm/Analysis/ValueTracking.h"
36#include "llvm/CodeGen/ISDOpcodes.h"
37#include "llvm/CodeGen/Passes.h"
38#include "llvm/CodeGen/TargetLowering.h"
39#include "llvm/CodeGen/TargetPassConfig.h"
40#include "llvm/CodeGen/TargetSubtargetInfo.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstIterator.h"
43#include "llvm/IR/IntrinsicInst.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/PassManager.h"
46#include "llvm/InitializePasses.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/CommandLine.h"
49#include "llvm/Support/ErrorHandling.h"
50#include "llvm/Target/TargetMachine.h"
51#include "llvm/Transforms/Utils/BasicBlockUtils.h"
52#include "llvm/Transforms/Utils/IntegerDivision.h"
53#include <llvm/Support/Casting.h>
54#include <optional>
55
56#define DEBUG_TYPE "expand-ir-insts"
57
58using namespace llvm;
59
60static cl::opt<unsigned>
61 ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
62 cl::init(Val: llvm::IntegerType::MAX_INT_BITS),
63 cl::desc("fp convert instructions on integers with "
64 "more than <N> bits are expanded."));
65
66static cl::opt<unsigned>
67 ExpandDivRemBits("expand-div-rem-bits", cl::Hidden,
68 cl::init(Val: llvm::IntegerType::MAX_INT_BITS),
69 cl::desc("div and rem instructions on integers with "
70 "more than <N> bits are expanded."));
71
72namespace {
73bool isConstantPowerOfTwo(llvm::Value *V, bool SignedOp) {
74 auto *C = dyn_cast<ConstantInt>(Val: V);
75 if (!C)
76 return false;
77
78 APInt Val = C->getValue();
79 if (SignedOp && Val.isNegative())
80 Val = -Val;
81 return Val.isPowerOf2();
82}
83
84bool isSigned(unsigned int Opcode) {
85 return Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
86}
87
88/// For signed div/rem by a power of 2, compute the bias-adjusted dividend:
89/// Sign = ashr X, (BitWidth - 1) -- 0 or -1
90/// Bias = lshr Sign, (BitWidth - ShiftAmt) -- 0 or 2^ShiftAmt - 1
91/// Adjusted = add X, Bias
92/// The bias adds (2^ShiftAmt - 1) for negative X, correcting rounding towards
93/// zero (instead of towards -inf that a plain ashr would give).
94/// The lshr form is used instead of 'and' to avoid large immediate constants.
95static Value *addSignedBias(IRBuilder<> &Builder, Value *X, unsigned BitWidth,
96 unsigned ShiftAmt) {
97 assert(ShiftAmt > 0 && ShiftAmt < BitWidth &&
98 "ShiftAmt out of range; callers should handle ShiftAmt == 0");
99 Value *Sign = Builder.CreateAShr(LHS: X, RHS: BitWidth - 1, Name: "sign");
100 Value *Bias = Builder.CreateLShr(LHS: Sign, RHS: BitWidth - ShiftAmt, Name: "bias");
101 return Builder.CreateAdd(LHS: X, RHS: Bias, Name: "adjusted");
102}
103
104/// Expand division or remainder by a power-of-2 constant.
105/// Division (let C = log2(|divisor|)):
106/// udiv X, 2^C -> lshr X, C
107/// sdiv X, 2^C -> ashr (add X, Bias), C (Bias corrects rounding)
108/// sdiv exact X, 2^C -> ashr exact X, C (no bias needed)
109/// For negative power-of-2 divisors, the division result is negated.
110/// Remainder (let C = log2(|divisor|)):
111/// urem X, 2^C -> and X, (2^C - 1)
112/// srem X, 2^C -> sub X, (shl (ashr (add X, Bias), C), C)
113static void expandPow2DivRem(BinaryOperator *BO) {
114 LLVM_DEBUG(dbgs() << "Expanding instruction: " << *BO << '\n');
115
116 unsigned Opcode = BO->getOpcode();
117 bool IsDiv = (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv);
118 bool IsSigned = isSigned(Opcode);
119 // isExact() is only valid for div.
120 bool IsExact = IsDiv && BO->isExact();
121
122 assert(isConstantPowerOfTwo(BO->getOperand(1), IsSigned) &&
123 "Expected power-of-2 constant divisor");
124
125 Value *X = BO->getOperand(i_nocapture: 0);
126 auto *C = cast<ConstantInt>(Val: BO->getOperand(i_nocapture: 1));
127 Type *Ty = BO->getType();
128 unsigned BitWidth = Ty->getIntegerBitWidth();
129
130 APInt DivisorVal = C->getValue();
131 bool IsNegativeDivisor = IsSigned && DivisorVal.isNegative();
132 // Use countr_zero() to get the shift amount directly from the bit pattern.
133 // This works correctly for both positive and negative powers of 2, including
134 // INT_MIN, without needing to negate the value first.
135 unsigned ShiftAmt = DivisorVal.countr_zero();
136
137 IRBuilder<> Builder(BO);
138 Value *Result;
139
140 if (ShiftAmt == 0) {
141 // Div by 1/-1: X / 1 = X, X / -1 = -X.
142 // Rem by 1/-1: always 0.
143 if (IsDiv)
144 Result = IsNegativeDivisor ? Builder.CreateNeg(V: X) : X;
145 else
146 Result = ConstantInt::get(Ty, V: 0);
147 } else if (IsSigned) {
148 // The signed expansion uses X multiple times (bias computation, shift,
149 // and sub for remainder). Freeze X to ensure consistent behavior if it is
150 // undef/poison. For exact division, no bias is needed and X is used only
151 // once, so freeze is unnecessary.
152 if (!IsExact && !isGuaranteedNotToBeUndefOrPoison(V: X))
153 X = Builder.CreateFreeze(V: X, Name: X->getName() + ".fr");
154 // For exact division, no bias is needed since there's no rounding.
155 Value *Dividend =
156 IsExact ? X : addSignedBias(Builder, X, BitWidth, ShiftAmt);
157 Value *Quotient = Builder.CreateAShr(
158 LHS: Dividend, RHS: ShiftAmt, Name: IsDiv && IsNegativeDivisor ? "pre.neg" : "shifted",
159 isExact: IsExact);
160 if (IsDiv) {
161 Result = IsNegativeDivisor ? Builder.CreateNeg(V: Quotient) : Quotient;
162 } else {
163 // Rem = X - (Quotient << ShiftAmt):
164 // clear lower ShiftAmt bits via round-trip shift, then subtract.
165 Value *Truncated = Builder.CreateShl(LHS: Quotient, RHS: ShiftAmt, Name: "truncated");
166 Result = Builder.CreateSub(LHS: X, RHS: Truncated);
167 }
168 } else {
169 if (IsDiv) {
170 Result = Builder.CreateLShr(LHS: X, RHS: ShiftAmt, Name: "", isExact: IsExact);
171 } else {
172 APInt Mask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ShiftAmt);
173 Result = Builder.CreateAnd(LHS: X, RHS: ConstantInt::get(Ty, V: Mask));
174 }
175 }
176
177 BO->replaceAllUsesWith(V: Result);
178 if (Result != X)
179 if (auto *RI = dyn_cast<Instruction>(Val: Result))
180 RI->takeName(V: BO);
181 BO->dropAllReferences();
182 BO->eraseFromParent();
183}
184
185/// This class implements a precise expansion of the frem instruction.
186/// The generated code is based on the fmod implementation in the AMD device
187/// libs.
188class FRemExpander {
189 /// The IRBuilder to use for the expansion.
190 IRBuilder<> &B;
191
192 /// Floating point type of the return value and the arguments of the FRem
193 /// instructions that should be expanded.
194 Type *FremTy;
195
196 /// Floating point type to use for the computation. This may be
197 /// wider than the \p FremTy.
198 Type *ComputeFpTy;
199
200 /// Integer type used to hold the exponents returned by frexp.
201 Type *ExTy;
202
203 /// How many bits of the quotient to compute per iteration of the
204 /// algorithm, stored as a value of type \p ExTy.
205 Value *Bits;
206
207 /// Constant 1 of type \p ExTy.
208 Value *One;
209
210 /// The frem argument/return types that can be expanded by this class.
211 // TODO: The expansion could work for other floating point types
212 // as well, but this would require additional testing.
213 static constexpr std::array<MVT, 3> ExpandableTypes{MVT::f16, MVT::f32,
214 MVT::f64};
215
216public:
217 static bool canExpandType(Type *Ty) {
218 EVT VT = EVT::getEVT(Ty);
219 assert(VT.isSimple() && "Can expand only simple types");
220
221 return is_contained(Range: ExpandableTypes, Element: VT.getSimpleVT());
222 }
223
224 static bool shouldExpandFremType(const TargetLowering &TLI, EVT VT) {
225 assert(!VT.isVector() && "Cannot handle vector type; must scalarize first");
226 return TLI.getOperationAction(Op: ISD::FREM, VT) ==
227 TargetLowering::LegalizeAction::Expand;
228 }
229
230 static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
231 // Consider scalar type for simplicity. It seems unlikely that a
232 // vector type can be legalized without expansion if the scalar
233 // type cannot.
234 return shouldExpandFremType(TLI, VT: EVT::getEVT(Ty: Ty->getScalarType()));
235 }
236
237 /// Return true if the pass should expand frem instructions of any type
238 /// for the target represented by \p TLI.
239 static bool shouldExpandAnyFremType(const TargetLowering &TLI) {
240 return any_of(Range: ExpandableTypes,
241 P: [&](MVT V) { return shouldExpandFremType(TLI, VT: EVT(V)); });
242 }
243
244 static FRemExpander create(IRBuilder<> &B, Type *Ty) {
245 assert(canExpandType(Ty) && "Expected supported floating point type");
246
247 // The type to use for the computation of the remainder. This may be
248 // wider than the input/result type which affects the ...
249 Type *ComputeTy = Ty;
250 // ... maximum number of iterations of the remainder computation loop
251 // to use. This value is for the case in which the computation
252 // uses the same input/result type.
253 unsigned MaxIter = 2;
254
255 if (Ty->isHalfTy()) {
256 // Use the wider type and less iterations.
257 ComputeTy = B.getFloatTy();
258 MaxIter = 1;
259 }
260
261 unsigned Precision =
262 llvm::APFloat::semanticsPrecision(Ty->getFltSemantics());
263 return FRemExpander{B, Ty, Precision / MaxIter, ComputeTy};
264 }
265
266 /// Build the FRem expansion for the numerator \p X and the
267 /// denumerator \p Y. The type of X and Y must match \p FremTy. The
268 /// code will be generated at the insertion point of \p B and the
269 /// insertion point will be reset at exit.
270 Value *buildFRem(Value *X, Value *Y, std::optional<SimplifyQuery> &SQ) const;
271
272 /// Build an approximate FRem expansion for the numerator \p X and
273 /// the denumerator \p Y at the insertion point of builder \p B.
274 /// The type of X and Y must match \p FremTy.
275 Value *buildApproxFRem(Value *X, Value *Y) const;
276
277private:
278 FRemExpander(IRBuilder<> &B, Type *FremTy, unsigned Bits, Type *ComputeFpTy)
279 : B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()),
280 Bits(ConstantInt::get(Ty: ExTy, V: Bits)), One(ConstantInt::get(Ty: ExTy, V: 1)) {};
281
282 Value *createRcp(Value *V, const Twine &Name) const {
283 // Leave it to later optimizations to turn this into an rcp
284 // instruction if available.
285 return B.CreateFDiv(L: ConstantFP::get(Ty: ComputeFpTy, V: 1.0), R: V, Name);
286 }
287
288 // Helper function to build the UPDATE_AX code which is common to the
289 // loop body and the "final iteration".
290 Value *buildUpdateAx(Value *Ax, Value *Ay, Value *Ayinv) const {
291 // Build:
292 // float q = rint(ax * ayinv);
293 // ax = fma(-q, ay, ax);
294 // int clt = ax < 0.0f;
295 // float axp = ax + ay;
296 // ax = clt ? axp : ax;
297 Value *Q = B.CreateUnaryIntrinsic(ID: Intrinsic::rint, V: B.CreateFMul(L: Ax, R: Ayinv),
298 FMFSource: {}, Name: "q");
299 Value *AxUpdate = B.CreateFMA(Factor1: B.CreateFNeg(V: Q), Factor2: Ay, Summand: Ax, FMFSource: {}, Name: "ax");
300 Value *Clt = B.CreateFCmp(P: CmpInst::FCMP_OLT, LHS: AxUpdate,
301 RHS: ConstantFP::getZero(Ty: ComputeFpTy), Name: "clt");
302 Value *Axp = B.CreateFAdd(L: AxUpdate, R: Ay, Name: "axp");
303 return B.CreateSelect(C: Clt, True: Axp, False: AxUpdate, Name: "ax");
304 }
305
306 /// Build code to extract the exponent and mantissa of \p Src.
307 /// Return the exponent minus one for use as a loop bound and
308 /// the mantissa taken to the given \p NewExp power.
309 std::pair<Value *, Value *> buildExpAndPower(Value *Src, Value *NewExp,
310 const Twine &ExName,
311 const Twine &PowName) const {
312 // Build:
313 // ExName = frexp_exp(Src) - 1;
314 // PowName = fldexp(frexp_mant(ExName), NewExp);
315 Type *Ty = Src->getType();
316 Type *ExTy = B.getInt32Ty();
317 Value *Frexp = B.CreateIntrinsic(ID: Intrinsic::frexp, Types: {Ty, ExTy}, Args: Src);
318 Value *Mant = B.CreateExtractValue(Agg: Frexp, Idxs: {0});
319 Value *Exp = B.CreateExtractValue(Agg: Frexp, Idxs: {1});
320
321 Exp = B.CreateSub(LHS: Exp, RHS: One, Name: ExName);
322 Value *Pow = B.CreateLdexp(Src: Mant, Exp: NewExp, FMFSource: {}, Name: PowName);
323
324 return {Pow, Exp};
325 }
326
327 /// Build the main computation of the remainder for the case in which
328 /// Ax > Ay, where Ax = |X|, Ay = |Y|, and X is the numerator and Y the
329 /// denumerator. Add the incoming edge from the computation result
330 /// to \p RetPhi.
331 void buildRemainderComputation(Value *AxInitial, Value *AyInitial, Value *X,
332 PHINode *RetPhi, FastMathFlags FMF) const {
333 IRBuilder<>::FastMathFlagGuard Guard(B);
334 B.setFastMathFlags(FMF);
335
336 // Build:
337 // ex = frexp_exp(ax) - 1;
338 // ax = fldexp(frexp_mant(ax), bits);
339 // ey = frexp_exp(ay) - 1;
340 // ay = fledxp(frexp_mant(ay), 1);
341 auto [Ax, Ex] = buildExpAndPower(Src: AxInitial, NewExp: Bits, ExName: "ex", PowName: "ax");
342 auto [Ay, Ey] = buildExpAndPower(Src: AyInitial, NewExp: One, ExName: "ey", PowName: "ay");
343
344 // Build:
345 // int nb = ex - ey;
346 // float ayinv = 1.0/ay;
347 Value *Nb = B.CreateSub(LHS: Ex, RHS: Ey, Name: "nb");
348 Value *Ayinv = createRcp(V: Ay, Name: "ayinv");
349
350 // Build: while (nb > bits)
351 BasicBlock *PreheaderBB = B.GetInsertBlock();
352 Function *Fun = PreheaderBB->getParent();
353 auto *LoopBB = BasicBlock::Create(Context&: B.getContext(), Name: "frem.loop_body", Parent: Fun);
354 auto *ExitBB = BasicBlock::Create(Context&: B.getContext(), Name: "frem.loop_exit", Parent: Fun);
355
356 B.CreateCondBr(Cond: B.CreateICmp(P: CmpInst::ICMP_SGT, LHS: Nb, RHS: Bits), True: LoopBB, False: ExitBB);
357
358 // Build loop body:
359 // UPDATE_AX
360 // ax = fldexp(ax, bits);
361 // nb -= bits;
362 // One iteration of the loop is factored out. The code shared by
363 // the loop and this "iteration" is denoted by UPDATE_AX.
364 B.SetInsertPoint(LoopBB);
365 PHINode *NbIv = B.CreatePHI(Ty: Nb->getType(), NumReservedValues: 2, Name: "nb_iv");
366 NbIv->addIncoming(V: Nb, BB: PreheaderBB);
367
368 auto *AxPhi = B.CreatePHI(Ty: ComputeFpTy, NumReservedValues: 2, Name: "ax_loop_phi");
369 AxPhi->addIncoming(V: Ax, BB: PreheaderBB);
370
371 Value *AxPhiUpdate = buildUpdateAx(Ax: AxPhi, Ay, Ayinv);
372 AxPhiUpdate = B.CreateLdexp(Src: AxPhiUpdate, Exp: Bits, FMFSource: {}, Name: "ax_update");
373 AxPhi->addIncoming(V: AxPhiUpdate, BB: LoopBB);
374 NbIv->addIncoming(V: B.CreateSub(LHS: NbIv, RHS: Bits, Name: "nb_update"), BB: LoopBB);
375
376 B.CreateCondBr(Cond: B.CreateICmp(P: CmpInst::ICMP_SGT, LHS: NbIv, RHS: Bits), True: LoopBB, False: ExitBB);
377
378 // Build final iteration
379 // ax = fldexp(ax, nb - bits + 1);
380 // UPDATE_AX
381 B.SetInsertPoint(ExitBB);
382
383 auto *AxPhiExit = B.CreatePHI(Ty: ComputeFpTy, NumReservedValues: 2, Name: "ax_exit_phi");
384 AxPhiExit->addIncoming(V: Ax, BB: PreheaderBB);
385 AxPhiExit->addIncoming(V: AxPhi, BB: LoopBB);
386 auto *NbExitPhi = B.CreatePHI(Ty: Nb->getType(), NumReservedValues: 2, Name: "nb_exit_phi");
387 NbExitPhi->addIncoming(V: NbIv, BB: LoopBB);
388 NbExitPhi->addIncoming(V: Nb, BB: PreheaderBB);
389
390 Value *AxFinal = B.CreateLdexp(
391 Src: AxPhiExit, Exp: B.CreateAdd(LHS: B.CreateSub(LHS: NbExitPhi, RHS: Bits), RHS: One), FMFSource: {}, Name: "ax");
392 AxFinal = buildUpdateAx(Ax: AxFinal, Ay, Ayinv);
393
394 // Build:
395 // ax = fldexp(ax, ey);
396 // ret = copysign(ax,x);
397 AxFinal = B.CreateLdexp(Src: AxFinal, Exp: Ey, FMFSource: {}, Name: "ax");
398 if (ComputeFpTy != FremTy)
399 AxFinal = B.CreateFPTrunc(V: AxFinal, DestTy: FremTy);
400 Value *Ret = B.CreateCopySign(LHS: AxFinal, RHS: X);
401
402 RetPhi->addIncoming(V: Ret, BB: ExitBB);
403 }
404
405 /// Build the else-branch of the conditional in the FRem
406 /// expansion, i.e. the case in wich Ax <= Ay, where Ax = |X|, Ay
407 /// = |Y|, and X is the numerator and Y the denumerator. Add the
408 /// incoming edge from the result to \p RetPhi.
409 void buildElseBranch(Value *Ax, Value *Ay, Value *X, PHINode *RetPhi) const {
410 // Build:
411 // ret = ax == ay ? copysign(0.0f, x) : x;
412 Value *ZeroWithXSign = B.CreateCopySign(LHS: ConstantFP::getZero(Ty: FremTy), RHS: X);
413 Value *Ret = B.CreateSelect(C: B.CreateFCmpOEQ(LHS: Ax, RHS: Ay), True: ZeroWithXSign, False: X);
414
415 RetPhi->addIncoming(V: Ret, BB: B.GetInsertBlock());
416 }
417
418 /// Return a value that is NaN if one of the corner cases concerning
419 /// the inputs \p X and \p Y is detected, and \p Ret otherwise.
420 Value *handleInputCornerCases(Value *Ret, Value *X, Value *Y,
421 std::optional<SimplifyQuery> &SQ,
422 bool NoInfs) const {
423 // Build:
424 // ret = (y == 0.0f || isnan(y)) ? QNAN : ret;
425 // ret = isfinite(x) ? ret : QNAN;
426 Value *Nan = ConstantFP::getQNaN(Ty: FremTy);
427 Ret = B.CreateSelect(C: B.CreateFCmpUEQ(LHS: Y, RHS: ConstantFP::getZero(Ty: FremTy)), True: Nan,
428 False: Ret);
429 Value *XFinite =
430 NoInfs || (SQ && isKnownNeverInfinity(V: X, SQ: *SQ))
431 ? B.getTrue()
432 : B.CreateFCmpULT(LHS: B.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X),
433 RHS: ConstantFP::getInfinity(Ty: FremTy));
434 Ret = B.CreateSelect(C: XFinite, True: Ret, False: Nan);
435
436 return Ret;
437 }
438};
439
440Value *FRemExpander::buildApproxFRem(Value *X, Value *Y) const {
441 IRBuilder<>::FastMathFlagGuard Guard(B);
442 // Propagating the approximate functions flag to the
443 // division leads to an unacceptable drop in precision
444 // on AMDGPU.
445 // TODO Find out if any flags might be worth propagating.
446 B.clearFastMathFlags();
447
448 Value *Quot = B.CreateFDiv(L: X, R: Y);
449 Value *Trunc = B.CreateUnaryIntrinsic(ID: Intrinsic::trunc, V: Quot, FMFSource: {});
450 Value *Neg = B.CreateFNeg(V: Trunc);
451
452 return B.CreateFMA(Factor1: Neg, Factor2: Y, Summand: X);
453}
454
455Value *FRemExpander::buildFRem(Value *X, Value *Y,
456 std::optional<SimplifyQuery> &SQ) const {
457 assert(X->getType() == FremTy && Y->getType() == FremTy);
458
459 FastMathFlags FMF = B.getFastMathFlags();
460
461 // This function generates the following code structure:
462 // if (abs(x) > abs(y))
463 // { ret = compute remainder }
464 // else
465 // { ret = x or 0 with sign of x }
466 // Adjust ret to NaN/inf in input
467 // return ret
468 Value *Ax = B.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X, FMFSource: {}, Name: "ax");
469 Value *Ay = B.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Y, FMFSource: {}, Name: "ay");
470 if (ComputeFpTy != X->getType()) {
471 Ax = B.CreateFPExt(V: Ax, DestTy: ComputeFpTy, Name: "ax");
472 Ay = B.CreateFPExt(V: Ay, DestTy: ComputeFpTy, Name: "ay");
473 }
474 Value *AxAyCmp = B.CreateFCmpOGT(LHS: Ax, RHS: Ay);
475
476 PHINode *RetPhi = B.CreatePHI(Ty: FremTy, NumReservedValues: 2, Name: "ret");
477 Value *Ret = RetPhi;
478
479 // We would return NaN in all corner cases handled here.
480 // Hence, if NaNs are excluded, keep the result as it is.
481 if (!FMF.noNaNs())
482 Ret = handleInputCornerCases(Ret, X, Y, SQ, NoInfs: FMF.noInfs());
483
484 Function *Fun = B.GetInsertBlock()->getParent();
485 auto *ThenBB = BasicBlock::Create(Context&: B.getContext(), Name: "frem.compute", Parent: Fun);
486 auto *ElseBB = BasicBlock::Create(Context&: B.getContext(), Name: "frem.else", Parent: Fun);
487 SplitBlockAndInsertIfThenElse(Cond: AxAyCmp, SplitBefore: RetPhi, ThenBlock: &ThenBB, ElseBlock: &ElseBB);
488
489 auto SavedInsertPt = B.GetInsertPoint();
490
491 // Build remainder computation for "then" branch
492 //
493 // The ordered comparison ensures that ax and ay are not NaNs
494 // in the then-branch. Furthermore, y cannot be an infinity and the
495 // check at the end of the function ensures that the result will not
496 // be used if x is an infinity.
497 FastMathFlags ComputeFMF = FMF;
498 ComputeFMF.setNoInfs();
499 ComputeFMF.setNoNaNs();
500
501 B.SetInsertPoint(ThenBB);
502 buildRemainderComputation(AxInitial: Ax, AyInitial: Ay, X, RetPhi, FMF);
503 B.CreateBr(Dest: RetPhi->getParent());
504
505 // Build "else"-branch
506 B.SetInsertPoint(ElseBB);
507 buildElseBranch(Ax, Ay, X, RetPhi);
508 B.CreateBr(Dest: RetPhi->getParent());
509
510 B.SetInsertPoint(SavedInsertPt);
511
512 return Ret;
513}
514} // namespace
515
516static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) {
517 LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << '\n');
518
519 Type *Ty = I.getType();
520 assert(FRemExpander::canExpandType(Ty) &&
521 "Expected supported floating point type");
522
523 FastMathFlags FMF = I.getFastMathFlags();
524 // TODO Make use of those flags for optimization?
525 FMF.setAllowReciprocal(false);
526 FMF.setAllowContract(false);
527
528 IRBuilder<> B(&I);
529 B.setFastMathFlags(FMF);
530 B.SetCurrentDebugLocation(I.getDebugLoc());
531
532 const FRemExpander Expander = FRemExpander::create(B, Ty);
533 Value *Ret = FMF.approxFunc()
534 ? Expander.buildApproxFRem(X: I.getOperand(i_nocapture: 0), Y: I.getOperand(i_nocapture: 1))
535 : Expander.buildFRem(X: I.getOperand(i_nocapture: 0), Y: I.getOperand(i_nocapture: 1), SQ);
536
537 I.replaceAllUsesWith(V: Ret);
538 Ret->takeName(V: &I);
539 I.eraseFromParent();
540
541 return true;
542}
543// clang-format off: preserve formatting of the following example
544
545/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
546/// the generated code. This currently generates code similarly to compiler-rt's
547/// implementations.
548///
549/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
550/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
551/// entry:
552/// %0 = bitcast float %a to i32
553/// %conv.i = zext i32 %0 to i64
554/// %tobool.not = icmp sgt i32 %0, -1
555/// %conv = select i1 %tobool.not, i64 1, i64 -1
556/// %and = lshr i64 %conv.i, 23
557/// %shr = and i64 %and, 255
558/// %and2 = and i64 %conv.i, 8388607
559/// %or = or i64 %and2, 8388608
560/// %cmp = icmp ult i64 %shr, 127
561/// br i1 %cmp, label %cleanup, label %if.end
562///
563/// if.end: ; preds = %entry
564/// %sub = add nuw nsw i64 %shr, 4294967169
565/// %conv5 = and i64 %sub, 4294967232
566/// %cmp6.not = icmp eq i64 %conv5, 0
567/// br i1 %cmp6.not, label %if.end12, label %if.then8
568///
569/// if.then8: ; preds = %if.end
570/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64
571/// -9223372036854775808 br label %cleanup
572///
573/// if.end12: ; preds = %if.end
574/// %cmp13 = icmp ult i64 %shr, 150
575/// br i1 %cmp13, label %if.then15, label %if.else
576///
577/// if.then15: ; preds = %if.end12
578/// %sub16 = sub nuw nsw i64 150, %shr
579/// %shr17 = lshr i64 %or, %sub16
580/// %mul = mul nsw i64 %shr17, %conv
581/// br label %cleanup
582///
583/// if.else: ; preds = %if.end12
584/// %sub18 = add nsw i64 %shr, -150
585/// %shl = shl i64 %or, %sub18
586/// %mul19 = mul nsw i64 %shl, %conv
587/// br label %cleanup
588///
589/// cleanup: ; preds = %entry,
590/// %if.else, %if.then15, %if.then8
591/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [
592/// %mul19, %if.else ], [ 0, %entry ] ret i64 %retval.0
593/// }
594///
595/// Replace fp to integer with generated code.
596static void expandFPToI(Instruction *FPToI, bool IsSaturating, bool IsSigned) {
597 // clang-format on
598 IRBuilder<> Builder(FPToI);
599 auto *FloatVal = FPToI->getOperand(i: 0);
600 IntegerType *IntTy = cast<IntegerType>(Val: FPToI->getType());
601
602 unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
603 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
604
605 // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
606 // to i32 first following a sext/zext to target integer type.
607 Value *A1 = nullptr;
608 if (FloatVal->getType()->isHalfTy() && BitWidth >= 32) {
609 if (FPToI->getOpcode() == Instruction::FPToUI) {
610 Value *A0 = Builder.CreateFPToUI(V: FloatVal, DestTy: Builder.getInt32Ty());
611 A1 = Builder.CreateZExt(V: A0, DestTy: IntTy);
612 } else { // FPToSI
613 Value *A0 = Builder.CreateFPToSI(V: FloatVal, DestTy: Builder.getInt32Ty());
614 A1 = Builder.CreateSExt(V: A0, DestTy: IntTy);
615 }
616 FPToI->replaceAllUsesWith(V: A1);
617 FPToI->dropAllReferences();
618 FPToI->eraseFromParent();
619 return;
620 }
621
622 // fp80 conversion is implemented by fpext to fp128 first then do the
623 // conversion.
624 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
625 unsigned FloatWidth =
626 PowerOf2Ceil(A: FloatVal->getType()->getScalarSizeInBits());
627 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
628 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
629 IntegerType *FloatIntTy = Builder.getIntNTy(N: FloatWidth);
630 Value *ImplicitBit = ConstantInt::get(
631 Ty: FloatIntTy, V: APInt::getOneBitSet(numBits: FloatWidth, BitNo: FPMantissaWidth));
632 Value *SignificandMask = ConstantInt::get(
633 Ty: FloatIntTy, V: APInt::getLowBitsSet(numBits: FloatWidth, loBitsSet: FPMantissaWidth));
634
635 BasicBlock *Entry = Builder.GetInsertBlock();
636 Function *F = Entry->getParent();
637 Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
638 BasicBlock *CheckSaturateBB, *SaturateBB;
639 BasicBlock *End =
640 Entry->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "fp-to-i-cleanup");
641 if (IsSaturating) {
642 CheckSaturateBB = BasicBlock::Create(Context&: Builder.getContext(),
643 Name: "fp-to-i-if-check.saturate", Parent: F, InsertBefore: End);
644 SaturateBB =
645 BasicBlock::Create(Context&: Builder.getContext(), Name: "fp-to-i-if-saturate", Parent: F, InsertBefore: End);
646 }
647 BasicBlock *CheckExpSizeBB = BasicBlock::Create(
648 Context&: Builder.getContext(), Name: "fp-to-i-if-check.exp.size", Parent: F, InsertBefore: End);
649 BasicBlock *ExpSmallBB =
650 BasicBlock::Create(Context&: Builder.getContext(), Name: "fp-to-i-if-exp.small", Parent: F, InsertBefore: End);
651 BasicBlock *ExpLargeBB =
652 BasicBlock::Create(Context&: Builder.getContext(), Name: "fp-to-i-if-exp.large", Parent: F, InsertBefore: End);
653
654 Entry->getTerminator()->eraseFromParent();
655
656 // entry:
657 Builder.SetInsertPoint(Entry);
658 // We're going to introduce branches on the value, so freeze it.
659 if (!isGuaranteedNotToBeUndefOrPoison(V: FloatVal))
660 FloatVal = Builder.CreateFreeze(V: FloatVal);
661 // fp80 conversion is implemented by fpext to fp128 first then do the
662 // conversion.
663 if (FloatVal->getType()->isX86_FP80Ty())
664 FloatVal =
665 Builder.CreateFPExt(V: FloatVal, DestTy: Type::getFP128Ty(C&: Builder.getContext()));
666 Value *ARep = Builder.CreateBitCast(V: FloatVal, DestTy: FloatIntTy);
667 Value *PosOrNeg, *Sign;
668 if (IsSigned) {
669 PosOrNeg =
670 Builder.CreateICmpSGT(LHS: ARep, RHS: ConstantInt::getSigned(Ty: FloatIntTy, V: -1));
671 Sign = Builder.CreateSelect(C: PosOrNeg, True: ConstantInt::getSigned(Ty: IntTy, V: 1),
672 False: ConstantInt::getSigned(Ty: IntTy, V: -1), Name: "sign");
673 }
674 Value *And =
675 Builder.CreateLShr(LHS: ARep, RHS: Builder.getIntN(N: FloatWidth, C: FPMantissaWidth));
676 Value *BiasedExp = Builder.CreateAnd(
677 LHS: And, RHS: Builder.getIntN(N: FloatWidth, C: (1 << ExponentWidth) - 1), Name: "biased.exp");
678 Value *Abs = Builder.CreateAnd(LHS: ARep, RHS: SignificandMask);
679 Value *Significand = Builder.CreateOr(LHS: Abs, RHS: ImplicitBit, Name: "significand");
680 Value *ZeroResultCond = Builder.CreateICmpULT(
681 LHS: BiasedExp, RHS: Builder.getIntN(N: FloatWidth, C: ExponentBias), Name: "exp.is.negative");
682 if (IsSaturating) {
683 Value *IsNaN = Builder.CreateFCmpUNO(LHS: FloatVal, RHS: FloatVal, Name: "is.nan");
684 ZeroResultCond = Builder.CreateOr(LHS: ZeroResultCond, RHS: IsNaN);
685 if (!IsSigned) {
686 Value *IsNeg = Builder.CreateIsNeg(Arg: ARep);
687 ZeroResultCond = Builder.CreateOr(LHS: ZeroResultCond, RHS: IsNeg);
688 }
689 }
690 Builder.CreateCondBr(Cond: ZeroResultCond, True: End,
691 False: IsSaturating ? CheckSaturateBB : CheckExpSizeBB);
692
693 Value *Saturated;
694 if (IsSaturating) {
695 // check.saturate:
696 Builder.SetInsertPoint(CheckSaturateBB);
697 Value *Cmp3 = Builder.CreateICmpUGE(
698 LHS: BiasedExp, RHS: ConstantInt::getSigned(
699 Ty: FloatIntTy, V: static_cast<int64_t>(ExponentBias +
700 BitWidth - IsSigned)));
701 Builder.CreateCondBr(Cond: Cmp3, True: SaturateBB, False: CheckExpSizeBB);
702
703 // saturate:
704 Builder.SetInsertPoint(SaturateBB);
705 if (IsSigned) {
706 Value *SignedMax =
707 ConstantInt::get(Ty: IntTy, V: APInt::getSignedMaxValue(numBits: BitWidth));
708 Value *SignedMin =
709 ConstantInt::get(Ty: IntTy, V: APInt::getSignedMinValue(numBits: BitWidth));
710 Saturated =
711 Builder.CreateSelect(C: PosOrNeg, True: SignedMax, False: SignedMin, Name: "saturated");
712 } else {
713 Saturated = ConstantInt::getAllOnesValue(Ty: IntTy);
714 }
715 Builder.CreateBr(Dest: End);
716 }
717
718 // if.end9:
719 Builder.SetInsertPoint(CheckExpSizeBB);
720 Value *ExpSmallerMantissaWidth = Builder.CreateICmpULT(
721 LHS: BiasedExp, RHS: Builder.getIntN(N: FloatWidth, C: ExponentBias + FPMantissaWidth),
722 Name: "exp.smaller.mantissa.width");
723 Builder.CreateCondBr(Cond: ExpSmallerMantissaWidth, True: ExpSmallBB, False: ExpLargeBB);
724
725 // exp.small:
726 Builder.SetInsertPoint(ExpSmallBB);
727 Value *Sub13 = Builder.CreateSub(
728 LHS: Builder.getIntN(N: FloatWidth, C: ExponentBias + FPMantissaWidth), RHS: BiasedExp);
729 Value *ExpSmallRes =
730 Builder.CreateZExtOrTrunc(V: Builder.CreateLShr(LHS: Significand, RHS: Sub13), DestTy: IntTy);
731 if (IsSigned)
732 ExpSmallRes = Builder.CreateMul(LHS: ExpSmallRes, RHS: Sign);
733 Builder.CreateBr(Dest: End);
734
735 // exp.large:
736 Builder.SetInsertPoint(ExpLargeBB);
737 Value *Sub15 = Builder.CreateAdd(
738 LHS: BiasedExp,
739 RHS: ConstantInt::getSigned(
740 Ty: FloatIntTy, V: -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
741 Value *SignificandCast = Builder.CreateZExtOrTrunc(V: Significand, DestTy: IntTy);
742 Value *ExpLargeRes = Builder.CreateShl(
743 LHS: SignificandCast, RHS: Builder.CreateZExtOrTrunc(V: Sub15, DestTy: IntTy));
744 if (IsSigned)
745 ExpLargeRes = Builder.CreateMul(LHS: ExpLargeRes, RHS: Sign);
746 Builder.CreateBr(Dest: End);
747
748 // cleanup:
749 Builder.SetInsertPoint(TheBB: End, IP: End->begin());
750 PHINode *Retval0 = Builder.CreatePHI(Ty: FPToI->getType(), NumReservedValues: 3 + IsSaturating);
751
752 if (IsSaturating)
753 Retval0->addIncoming(V: Saturated, BB: SaturateBB);
754 Retval0->addIncoming(V: ExpSmallRes, BB: ExpSmallBB);
755 Retval0->addIncoming(V: ExpLargeRes, BB: ExpLargeBB);
756 Retval0->addIncoming(V: Builder.getIntN(N: BitWidth, C: 0), BB: Entry);
757
758 FPToI->replaceAllUsesWith(V: Retval0);
759 FPToI->dropAllReferences();
760 FPToI->eraseFromParent();
761}
762
763// clang-format off: preserve formatting of the following example
764
765/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
766/// the generated code. This currently generates code similarly to compiler-rt's
767/// implementations. This implementation has an implicit assumption that integer
768/// width is larger than fp.
769///
770/// An example IR generated from compiler-rt/floatdisf.c looks like below:
771/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
772/// entry:
773/// %cmp = icmp eq i64 %a, 0
774/// br i1 %cmp, label %return, label %if.end
775///
776/// if.end: ; preds = %entry
777/// %shr = ashr i64 %a, 63
778/// %xor = xor i64 %shr, %a
779/// %sub = sub nsw i64 %xor, %shr
780/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
781/// %cast = trunc i64 %0 to i32
782/// %sub1 = sub nuw nsw i32 64, %cast
783/// %sub2 = xor i32 %cast, 63
784/// %cmp3 = icmp ult i32 %cast, 40
785/// br i1 %cmp3, label %if.then4, label %if.else
786///
787/// if.then4: ; preds = %if.end
788/// switch i32 %sub1, label %sw.default [
789/// i32 25, label %sw.bb
790/// i32 26, label %sw.epilog
791/// ]
792///
793/// sw.bb: ; preds = %if.then4
794/// %shl = shl i64 %sub, 1
795/// br label %sw.epilog
796///
797/// sw.default: ; preds = %if.then4
798/// %sub5 = sub nsw i64 38, %0
799/// %sh_prom = and i64 %sub5, 4294967295
800/// %shr6 = lshr i64 %sub, %sh_prom
801/// %shr9 = lshr i64 274877906943, %0
802/// %and = and i64 %shr9, %sub
803/// %cmp10 = icmp ne i64 %and, 0
804/// %conv11 = zext i1 %cmp10 to i64
805/// %or = or i64 %shr6, %conv11
806/// br label %sw.epilog
807///
808/// sw.epilog: ; preds = %sw.default,
809/// %if.then4, %sw.bb
810/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl,
811/// %sw.bb ] %1 = lshr i64 %a.addr.0, 2 %2 = and i64 %1, 1 %or16 = or i64 %2,
812/// %a.addr.0 %inc = add nsw i64 %or16, 1 %3 = and i64 %inc, 67108864
813/// %tobool.not = icmp eq i64 %3, 0
814/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
815/// %spec.select = ashr i64 %inc, %spec.select.v
816/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
817/// br label %if.end26
818///
819/// if.else: ; preds = %if.end
820/// %sub23 = add nuw nsw i64 %0, 4294967256
821/// %sh_prom24 = and i64 %sub23, 4294967295
822/// %shl25 = shl i64 %sub, %sh_prom24
823/// br label %if.end26
824///
825/// if.end26: ; preds = %sw.epilog,
826/// %if.else
827/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
828/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
829/// %conv27 = trunc i64 %shr to i32
830/// %and28 = and i32 %conv27, -2147483648
831/// %add = shl nuw nsw i32 %e.0, 23
832/// %shl29 = add nuw nsw i32 %add, 1065353216
833/// %conv31 = trunc i64 %a.addr.1 to i32
834/// %and32 = and i32 %conv31, 8388607
835/// %or30 = or i32 %and32, %and28
836/// %or33 = or i32 %or30, %shl29
837/// %4 = bitcast i32 %or33 to float
838/// br label %return
839///
840/// return: ; preds = %entry,
841/// %if.end26
842/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
843/// ret float %retval.0
844/// }
845///
846/// Replace integer to fp with generated code.
847static void expandIToFP(Instruction *IToFP) {
848 // clang-format on
849 IRBuilder<> Builder(IToFP);
850 auto *IntVal = IToFP->getOperand(i: 0);
851 IntegerType *IntTy = cast<IntegerType>(Val: IntVal->getType());
852
853 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
854 unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
855 // fp80 conversion is implemented by conversion tp fp128 first following
856 // a fptrunc to fp80.
857 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
858 // FIXME: As there is no related builtins added in compliler-rt,
859 // here currently utilized the fp32 <-> fp16 lib calls to implement.
860 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
861 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
862 unsigned FloatWidth = PowerOf2Ceil(A: FPMantissaWidth);
863 bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
864
865 // We're going to introduce branches on the value, so freeze it.
866 if (!isGuaranteedNotToBeUndefOrPoison(V: IntVal))
867 IntVal = Builder.CreateFreeze(V: IntVal);
868
869 // The expansion below assumes that int width >= float width. Zero or sign
870 // extend the integer accordingly.
871 if (BitWidth < FloatWidth) {
872 BitWidth = FloatWidth;
873 IntTy = Builder.getIntNTy(N: BitWidth);
874 IntVal = Builder.CreateIntCast(V: IntVal, DestTy: IntTy, isSigned: IsSigned);
875 }
876
877 Value *Temp1 =
878 Builder.CreateShl(LHS: Builder.getIntN(N: BitWidth, C: 1),
879 RHS: Builder.getIntN(N: BitWidth, C: FPMantissaWidth + 3));
880
881 BasicBlock *Entry = Builder.GetInsertBlock();
882 Function *F = Entry->getParent();
883 Entry->setName(Twine(Entry->getName(), "itofp-entry"));
884 BasicBlock *End =
885 Entry->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "itofp-return");
886 BasicBlock *IfEnd =
887 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-end", Parent: F, InsertBefore: End);
888 BasicBlock *IfThen4 =
889 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-then4", Parent: F, InsertBefore: End);
890 BasicBlock *SwBB =
891 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-sw-bb", Parent: F, InsertBefore: End);
892 BasicBlock *SwDefault =
893 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-sw-default", Parent: F, InsertBefore: End);
894 BasicBlock *SwEpilog =
895 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-sw-epilog", Parent: F, InsertBefore: End);
896 BasicBlock *IfThen20 =
897 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-then20", Parent: F, InsertBefore: End);
898 BasicBlock *IfElse =
899 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-else", Parent: F, InsertBefore: End);
900 BasicBlock *IfEnd26 =
901 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-end26", Parent: F, InsertBefore: End);
902
903 Entry->getTerminator()->eraseFromParent();
904
905 Function *CTLZ =
906 Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: Intrinsic::ctlz, Tys: IntTy);
907 ConstantInt *True = Builder.getTrue();
908
909 // entry:
910 Builder.SetInsertPoint(Entry);
911 Value *Cmp = Builder.CreateICmpEQ(LHS: IntVal, RHS: ConstantInt::getSigned(Ty: IntTy, V: 0));
912 Builder.CreateCondBr(Cond: Cmp, True: End, False: IfEnd);
913
914 // if.end:
915 Builder.SetInsertPoint(IfEnd);
916 Value *Shr =
917 Builder.CreateAShr(LHS: IntVal, RHS: Builder.getIntN(N: BitWidth, C: BitWidth - 1));
918 Value *Xor = Builder.CreateXor(LHS: Shr, RHS: IntVal);
919 Value *Sub = Builder.CreateSub(LHS: Xor, RHS: Shr);
920 Value *Call = Builder.CreateCall(Callee: CTLZ, Args: {IsSigned ? Sub : IntVal, True});
921 Value *Cast = Builder.CreateTrunc(V: Call, DestTy: Builder.getInt32Ty());
922 int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
923 Value *Sub1 = Builder.CreateSub(LHS: Builder.getIntN(N: BitWidthNew, C: BitWidth),
924 RHS: FloatWidth == 128 ? Call : Cast);
925 Value *Sub2 = Builder.CreateSub(LHS: Builder.getIntN(N: BitWidthNew, C: BitWidth - 1),
926 RHS: FloatWidth == 128 ? Call : Cast);
927 Value *Cmp3 = Builder.CreateICmpSGT(
928 LHS: Sub1, RHS: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + 1));
929 Builder.CreateCondBr(Cond: Cmp3, True: IfThen4, False: IfElse);
930
931 // if.then4:
932 Builder.SetInsertPoint(IfThen4);
933 llvm::SwitchInst *SI = Builder.CreateSwitch(V: Sub1, Dest: SwDefault);
934 SI->addCase(OnVal: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + 2), Dest: SwBB);
935 SI->addCase(OnVal: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + 3), Dest: SwEpilog);
936
937 // sw.bb:
938 Builder.SetInsertPoint(SwBB);
939 Value *Shl =
940 Builder.CreateShl(LHS: IsSigned ? Sub : IntVal, RHS: Builder.getIntN(N: BitWidth, C: 1));
941 Builder.CreateBr(Dest: SwEpilog);
942
943 // sw.default:
944 Builder.SetInsertPoint(SwDefault);
945 Value *Sub5 = Builder.CreateSub(
946 LHS: Builder.getIntN(N: BitWidthNew, C: BitWidth - FPMantissaWidth - 3),
947 RHS: FloatWidth == 128 ? Call : Cast);
948 Value *ShProm = Builder.CreateZExt(V: Sub5, DestTy: IntTy);
949 Value *Shr6 = Builder.CreateLShr(LHS: IsSigned ? Sub : IntVal,
950 RHS: FloatWidth == 128 ? Sub5 : ShProm);
951 Value *Sub8 =
952 Builder.CreateAdd(LHS: FloatWidth == 128 ? Call : Cast,
953 RHS: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + 3));
954 Value *ShProm9 = Builder.CreateZExt(V: Sub8, DestTy: IntTy);
955 Value *Shr9 = Builder.CreateLShr(LHS: ConstantInt::getSigned(Ty: IntTy, V: -1),
956 RHS: FloatWidth == 128 ? Sub8 : ShProm9);
957 Value *And = Builder.CreateAnd(LHS: Shr9, RHS: IsSigned ? Sub : IntVal);
958 Value *Cmp10 = Builder.CreateICmpNE(LHS: And, RHS: Builder.getIntN(N: BitWidth, C: 0));
959 Value *Conv11 = Builder.CreateZExt(V: Cmp10, DestTy: IntTy);
960 Value *Or = Builder.CreateOr(LHS: Shr6, RHS: Conv11);
961 Builder.CreateBr(Dest: SwEpilog);
962
963 // sw.epilog:
964 Builder.SetInsertPoint(SwEpilog);
965 PHINode *AAddr0 = Builder.CreatePHI(Ty: IntTy, NumReservedValues: 3);
966 AAddr0->addIncoming(V: Or, BB: SwDefault);
967 AAddr0->addIncoming(V: IsSigned ? Sub : IntVal, BB: IfThen4);
968 AAddr0->addIncoming(V: Shl, BB: SwBB);
969 Value *A0 = Builder.CreateTrunc(V: AAddr0, DestTy: Builder.getInt32Ty());
970 Value *A1 = Builder.CreateLShr(LHS: A0, RHS: Builder.getInt32(C: 2));
971 Value *A2 = Builder.CreateAnd(LHS: A1, RHS: Builder.getInt32(C: 1));
972 Value *Conv16 = Builder.CreateZExt(V: A2, DestTy: IntTy);
973 Value *Or17 = Builder.CreateOr(LHS: AAddr0, RHS: Conv16);
974 Value *Inc = Builder.CreateAdd(LHS: Or17, RHS: Builder.getIntN(N: BitWidth, C: 1));
975 Value *Shr18 = nullptr;
976 if (IsSigned)
977 Shr18 = Builder.CreateAShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: 2));
978 else
979 Shr18 = Builder.CreateLShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: 2));
980 Value *A3 = Builder.CreateAnd(LHS: Inc, RHS: Temp1, Name: "a3");
981 Value *PosOrNeg = Builder.CreateICmpEQ(LHS: A3, RHS: Builder.getIntN(N: BitWidth, C: 0));
982 Value *ExtractT60 = Builder.CreateTrunc(V: Shr18, DestTy: Builder.getIntNTy(N: FloatWidth));
983 Value *Extract63 = Builder.CreateLShr(LHS: Shr18, RHS: Builder.getIntN(N: BitWidth, C: 32));
984 Value *ExtractT64 = nullptr;
985 if (FloatWidth > 80)
986 ExtractT64 = Builder.CreateTrunc(V: Sub2, DestTy: Builder.getInt64Ty());
987 else
988 ExtractT64 = Builder.CreateTrunc(V: Extract63, DestTy: Builder.getInt32Ty());
989 Builder.CreateCondBr(Cond: PosOrNeg, True: IfEnd26, False: IfThen20);
990
991 // if.then20
992 Builder.SetInsertPoint(IfThen20);
993 Value *Shr21 = nullptr;
994 if (IsSigned)
995 Shr21 = Builder.CreateAShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: 3));
996 else
997 Shr21 = Builder.CreateLShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: 3));
998 Value *ExtractT = Builder.CreateTrunc(V: Shr21, DestTy: Builder.getIntNTy(N: FloatWidth));
999 Value *Extract = Builder.CreateLShr(LHS: Shr21, RHS: Builder.getIntN(N: BitWidth, C: 32));
1000 Value *ExtractT62 = nullptr;
1001 if (FloatWidth > 80)
1002 ExtractT62 = Builder.CreateTrunc(V: Sub1, DestTy: Builder.getInt64Ty());
1003 else
1004 ExtractT62 = Builder.CreateTrunc(V: Extract, DestTy: Builder.getInt32Ty());
1005 Builder.CreateBr(Dest: IfEnd26);
1006
1007 // if.else:
1008 Builder.SetInsertPoint(IfElse);
1009 Value *Sub24 = Builder.CreateAdd(
1010 LHS: FloatWidth == 128 ? Call : Cast,
1011 RHS: ConstantInt::getSigned(Ty: Builder.getIntNTy(N: BitWidthNew),
1012 V: -(int)(BitWidth - FPMantissaWidth - 1)));
1013 Value *ShProm25 = Builder.CreateZExt(V: Sub24, DestTy: IntTy);
1014 Value *Shl26 = Builder.CreateShl(LHS: IsSigned ? Sub : IntVal,
1015 RHS: FloatWidth == 128 ? Sub24 : ShProm25);
1016 Value *ExtractT61 = Builder.CreateTrunc(V: Shl26, DestTy: Builder.getIntNTy(N: FloatWidth));
1017 Value *Extract65 = Builder.CreateLShr(LHS: Shl26, RHS: Builder.getIntN(N: BitWidth, C: 32));
1018 Value *ExtractT66 = nullptr;
1019 if (FloatWidth > 80)
1020 ExtractT66 = Builder.CreateTrunc(V: Sub2, DestTy: Builder.getInt64Ty());
1021 else
1022 ExtractT66 = Builder.CreateTrunc(V: Extract65, DestTy: Builder.getInt32Ty());
1023 Builder.CreateBr(Dest: IfEnd26);
1024
1025 // if.end26:
1026 Builder.SetInsertPoint(IfEnd26);
1027 PHINode *AAddr1Off0 = Builder.CreatePHI(Ty: Builder.getIntNTy(N: FloatWidth), NumReservedValues: 3);
1028 AAddr1Off0->addIncoming(V: ExtractT, BB: IfThen20);
1029 AAddr1Off0->addIncoming(V: ExtractT60, BB: SwEpilog);
1030 AAddr1Off0->addIncoming(V: ExtractT61, BB: IfElse);
1031 PHINode *AAddr1Off32 = nullptr;
1032 if (FloatWidth > 32) {
1033 AAddr1Off32 =
1034 Builder.CreatePHI(Ty: Builder.getIntNTy(N: FloatWidth > 80 ? 64 : 32), NumReservedValues: 3);
1035 AAddr1Off32->addIncoming(V: ExtractT62, BB: IfThen20);
1036 AAddr1Off32->addIncoming(V: ExtractT64, BB: SwEpilog);
1037 AAddr1Off32->addIncoming(V: ExtractT66, BB: IfElse);
1038 }
1039 PHINode *E0 = nullptr;
1040 if (FloatWidth <= 80) {
1041 E0 = Builder.CreatePHI(Ty: Builder.getIntNTy(N: BitWidthNew), NumReservedValues: 3);
1042 E0->addIncoming(V: Sub1, BB: IfThen20);
1043 E0->addIncoming(V: Sub2, BB: SwEpilog);
1044 E0->addIncoming(V: Sub2, BB: IfElse);
1045 }
1046 Value *And29 = nullptr;
1047 if (FloatWidth > 80) {
1048 Value *Temp2 = Builder.CreateShl(LHS: Builder.getIntN(N: BitWidth, C: 1),
1049 RHS: Builder.getIntN(N: BitWidth, C: 63));
1050 And29 = Builder.CreateAnd(LHS: Shr, RHS: Temp2, Name: "and29");
1051 } else {
1052 Value *Conv28 = Builder.CreateTrunc(V: Shr, DestTy: Builder.getInt32Ty());
1053 And29 = Builder.CreateAnd(
1054 LHS: Conv28, RHS: ConstantInt::get(Context&: Builder.getContext(), V: APInt::getSignMask(BitWidth: 32)));
1055 }
1056 unsigned TempMod = FPMantissaWidth % 32;
1057 Value *And34 = nullptr;
1058 Value *Shl30 = nullptr;
1059 if (FloatWidth > 80) {
1060 TempMod += 32;
1061 Value *Add = Builder.CreateShl(LHS: AAddr1Off32, RHS: Builder.getInt64(C: TempMod));
1062 Shl30 = Builder.CreateAdd(
1063 LHS: Add, RHS: Builder.getInt64(C: ((1ull << (62ull - TempMod)) - 1ull) << TempMod));
1064 And34 = Builder.CreateZExt(V: Shl30, DestTy: Builder.getInt128Ty());
1065 } else {
1066 Value *Add = Builder.CreateShl(LHS: E0, RHS: Builder.getInt32(C: TempMod));
1067 Shl30 = Builder.CreateAdd(
1068 LHS: Add, RHS: Builder.getInt32(C: ((1 << (30 - TempMod)) - 1) << TempMod));
1069 And34 = Builder.CreateAnd(LHS: FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
1070 RHS: Builder.getInt32(C: (1 << TempMod) - 1));
1071 }
1072 Value *Or35 = nullptr;
1073 if (FloatWidth > 80) {
1074 Value *And29Trunc = Builder.CreateTrunc(V: And29, DestTy: Builder.getInt128Ty());
1075 Value *Or31 = Builder.CreateOr(LHS: And29Trunc, RHS: And34);
1076 Value *Or34 = Builder.CreateShl(LHS: Or31, RHS: Builder.getIntN(N: 128, C: 64));
1077 Value *Temp3 = Builder.CreateShl(LHS: Builder.getIntN(N: 128, C: 1),
1078 RHS: Builder.getIntN(N: 128, C: FPMantissaWidth));
1079 Value *Temp4 = Builder.CreateSub(LHS: Temp3, RHS: Builder.getIntN(N: 128, C: 1));
1080 Value *A6 = Builder.CreateAnd(LHS: AAddr1Off0, RHS: Temp4);
1081 Or35 = Builder.CreateOr(LHS: Or34, RHS: A6);
1082 } else {
1083 Value *Or31 = Builder.CreateOr(LHS: And34, RHS: And29);
1084 Or35 = Builder.CreateOr(LHS: IsSigned ? Or31 : And34, RHS: Shl30);
1085 }
1086 Value *A4 = nullptr;
1087 if (IToFP->getType()->isDoubleTy()) {
1088 Value *ZExt1 = Builder.CreateZExt(V: Or35, DestTy: Builder.getIntNTy(N: FloatWidth));
1089 Value *Shl1 = Builder.CreateShl(LHS: ZExt1, RHS: Builder.getIntN(N: FloatWidth, C: 32));
1090 Value *And1 =
1091 Builder.CreateAnd(LHS: AAddr1Off0, RHS: Builder.getIntN(N: FloatWidth, C: 0xFFFFFFFF));
1092 Value *Or1 = Builder.CreateOr(LHS: Shl1, RHS: And1);
1093 A4 = Builder.CreateBitCast(V: Or1, DestTy: IToFP->getType());
1094 } else if (IToFP->getType()->isX86_FP80Ty()) {
1095 Value *A40 =
1096 Builder.CreateBitCast(V: Or35, DestTy: Type::getFP128Ty(C&: Builder.getContext()));
1097 A4 = Builder.CreateFPTrunc(V: A40, DestTy: IToFP->getType());
1098 } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {
1099 // Deal with "half" situation. This is a workaround since we don't have
1100 // floattihf.c currently as referring.
1101 Value *A40 =
1102 Builder.CreateBitCast(V: Or35, DestTy: Type::getFloatTy(C&: Builder.getContext()));
1103 A4 = Builder.CreateFPTrunc(V: A40, DestTy: IToFP->getType());
1104 } else // float type
1105 A4 = Builder.CreateBitCast(V: Or35, DestTy: IToFP->getType());
1106 Builder.CreateBr(Dest: End);
1107
1108 // return:
1109 Builder.SetInsertPoint(TheBB: End, IP: End->begin());
1110 PHINode *Retval0 = Builder.CreatePHI(Ty: IToFP->getType(), NumReservedValues: 2);
1111 Retval0->addIncoming(V: A4, BB: IfEnd26);
1112 Retval0->addIncoming(V: ConstantFP::getZero(Ty: IToFP->getType(), Negative: false), BB: Entry);
1113
1114 IToFP->replaceAllUsesWith(V: Retval0);
1115 IToFP->dropAllReferences();
1116 IToFP->eraseFromParent();
1117}
1118
1119static void scalarize(Instruction *I,
1120 SmallVectorImpl<Instruction *> &Worklist) {
1121 VectorType *VTy = cast<FixedVectorType>(Val: I->getType());
1122
1123 IRBuilder<> Builder(I);
1124
1125 unsigned NumElements = VTy->getElementCount().getFixedValue();
1126 Value *Result = PoisonValue::get(T: VTy);
1127 for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
1128 Value *Ext = Builder.CreateExtractElement(Vec: I->getOperand(i: 0), Idx);
1129
1130 Value *NewOp = nullptr;
1131 if (auto *BinOp = dyn_cast<BinaryOperator>(Val: I))
1132 NewOp = Builder.CreateBinOp(
1133 Opc: BinOp->getOpcode(), LHS: Ext,
1134 RHS: Builder.CreateExtractElement(Vec: I->getOperand(i: 1), Idx));
1135 else if (auto *CastI = dyn_cast<CastInst>(Val: I))
1136 NewOp = Builder.CreateCast(Op: CastI->getOpcode(), V: Ext,
1137 DestTy: I->getType()->getScalarType());
1138 else
1139 llvm_unreachable("Unsupported instruction type");
1140
1141 Result = Builder.CreateInsertElement(Vec: Result, NewElt: NewOp, Idx);
1142 if (auto *ScalarizedI = dyn_cast<Instruction>(Val: NewOp)) {
1143 ScalarizedI->copyIRFlags(V: I, IncludeWrapFlags: true);
1144 Worklist.push_back(Elt: ScalarizedI);
1145 }
1146 }
1147
1148 I->replaceAllUsesWith(V: Result);
1149 I->dropAllReferences();
1150 I->eraseFromParent();
1151}
1152
1153static void addToWorklist(Instruction &I,
1154 SmallVector<Instruction *, 4> &Worklist) {
1155 if (I.getOperand(i: 0)->getType()->isVectorTy())
1156 scalarize(I: &I, Worklist);
1157 else
1158 Worklist.push_back(Elt: &I);
1159}
1160
1161static bool runImpl(Function &F, const TargetLowering &TLI,
1162 const LibcallLoweringInfo &Libcalls, AssumptionCache *AC) {
1163 SmallVector<Instruction *, 4> Worklist;
1164
1165 unsigned MaxLegalFpConvertBitWidth =
1166 TLI.getMaxLargeFPConvertBitWidthSupported();
1167 if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS)
1168 MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
1169
1170 unsigned MaxLegalDivRemBitWidth = TLI.getMaxDivRemBitWidthSupported();
1171 if (ExpandDivRemBits != llvm::IntegerType::MAX_INT_BITS)
1172 MaxLegalDivRemBitWidth = ExpandDivRemBits;
1173
1174 bool DisableExpandLargeFp =
1175 MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS;
1176 bool DisableExpandLargeDivRem =
1177 MaxLegalDivRemBitWidth >= llvm::IntegerType::MAX_INT_BITS;
1178 bool DisableFrem = !FRemExpander::shouldExpandAnyFremType(TLI);
1179
1180 if (DisableExpandLargeFp && DisableFrem && DisableExpandLargeDivRem)
1181 return false;
1182
1183 auto ShouldHandleInst = [&](Instruction &I) {
1184 Type *Ty = I.getType();
1185 // TODO: This pass doesn't handle scalable vectors.
1186 if (Ty->isScalableTy())
1187 return false;
1188
1189 switch (I.getOpcode()) {
1190 case Instruction::FRem:
1191 return !DisableFrem && FRemExpander::shouldExpandFremType(TLI, Ty);
1192 case Instruction::FPToUI:
1193 case Instruction::FPToSI:
1194 return !DisableExpandLargeFp &&
1195 cast<IntegerType>(Val: Ty->getScalarType())->getIntegerBitWidth() >
1196 MaxLegalFpConvertBitWidth;
1197 case Instruction::UIToFP:
1198 case Instruction::SIToFP:
1199 return !DisableExpandLargeFp &&
1200 cast<IntegerType>(Val: I.getOperand(i: 0)->getType()->getScalarType())
1201 ->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
1202 case Instruction::UDiv:
1203 case Instruction::SDiv:
1204 case Instruction::URem:
1205 case Instruction::SRem:
1206 // Power-of-2 divisors are handled inside the expansion (via efficient
1207 // shift/mask sequences) rather than being excluded here, so that
1208 // backends that cannot lower wide div/rem even for powers of two
1209 // (e.g. when DAGCombiner is disabled) still get valid lowered code.
1210 return !DisableExpandLargeDivRem &&
1211 cast<IntegerType>(Val: Ty->getScalarType())->getIntegerBitWidth() >
1212 MaxLegalDivRemBitWidth;
1213 case Instruction::Call: {
1214 auto *II = dyn_cast<IntrinsicInst>(Val: &I);
1215 if (II && (II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1216 II->getIntrinsicID() == Intrinsic::fptosi_sat)) {
1217 return !DisableExpandLargeFp &&
1218 cast<IntegerType>(Val: Ty->getScalarType())->getIntegerBitWidth() >
1219 MaxLegalFpConvertBitWidth;
1220 }
1221 return false;
1222 }
1223 }
1224
1225 return false;
1226 };
1227
1228 bool Modified = false;
1229 for (auto It = inst_begin(F: &F), End = inst_end(F); It != End;) {
1230 Instruction &I = *It++;
1231 if (!ShouldHandleInst(I))
1232 continue;
1233
1234 addToWorklist(I, Worklist);
1235 Modified = true;
1236 }
1237
1238 while (!Worklist.empty()) {
1239 Instruction *I = Worklist.pop_back_val();
1240
1241 switch (I->getOpcode()) {
1242 case Instruction::FRem: {
1243 auto SQ = [&]() -> std::optional<SimplifyQuery> {
1244 if (AC) {
1245 auto Res = std::make_optional<SimplifyQuery>(
1246 args: I->getModule()->getDataLayout(), args&: I);
1247 Res->AC = AC;
1248 return Res;
1249 }
1250 return {};
1251 }();
1252
1253 expandFRem(I&: cast<BinaryOperator>(Val&: *I), SQ);
1254 break;
1255 }
1256
1257 case Instruction::FPToUI:
1258 expandFPToI(FPToI: I, /*IsSaturating=*/false, /*IsSigned=*/false);
1259 break;
1260 case Instruction::FPToSI:
1261 expandFPToI(FPToI: I, /*IsSaturating=*/false, /*IsSigned=*/true);
1262 break;
1263
1264 case Instruction::UIToFP:
1265 case Instruction::SIToFP:
1266 expandIToFP(IToFP: I);
1267 break;
1268
1269 case Instruction::UDiv:
1270 case Instruction::SDiv:
1271 case Instruction::URem:
1272 case Instruction::SRem: {
1273 auto *BO = cast<BinaryOperator>(Val: I);
1274 // TODO: isConstantPowerOfTwo does not handle vector constants, so
1275 // vector div/rem by a power-of-2 splat goes through the generic path.
1276 if (isConstantPowerOfTwo(V: BO->getOperand(i_nocapture: 1), SignedOp: isSigned(Opcode: BO->getOpcode()))) {
1277 expandPow2DivRem(BO);
1278 } else {
1279 unsigned Opc = BO->getOpcode();
1280 if (Opc == Instruction::UDiv || Opc == Instruction::SDiv)
1281 expandDivision(Div: BO);
1282 else
1283 expandRemainder(Rem: BO);
1284 }
1285 break;
1286 }
1287 case Instruction::Call: {
1288 auto *II = cast<IntrinsicInst>(Val: I);
1289 assert(II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1290 II->getIntrinsicID() == Intrinsic::fptosi_sat);
1291 expandFPToI(FPToI: I, /*IsSaturating=*/true,
1292 /*IsSigned=*/II->getIntrinsicID() == Intrinsic::fptosi_sat);
1293 break;
1294 }
1295 }
1296 }
1297
1298 return Modified;
1299}
1300
1301namespace {
1302class ExpandIRInstsLegacyPass : public FunctionPass {
1303 CodeGenOptLevel OptLevel;
1304
1305public:
1306 static char ID;
1307
1308 ExpandIRInstsLegacyPass(CodeGenOptLevel OptLevel)
1309 : FunctionPass(ID), OptLevel(OptLevel) {}
1310
1311 ExpandIRInstsLegacyPass() : ExpandIRInstsLegacyPass(CodeGenOptLevel::None) {};
1312
1313 bool runOnFunction(Function &F) override {
1314 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1315 const TargetSubtargetInfo *Subtarget = TM->getSubtargetImpl(F);
1316 auto *TLI = Subtarget->getTargetLowering();
1317 AssumptionCache *AC = nullptr;
1318
1319 const LibcallLoweringInfo &Libcalls =
1320 getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
1321 M: *F.getParent(), Subtarget: *Subtarget);
1322
1323 if (OptLevel != CodeGenOptLevel::None && !F.hasOptNone())
1324 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1325 return runImpl(F, TLI: *TLI, Libcalls, AC);
1326 }
1327
1328 void getAnalysisUsage(AnalysisUsage &AU) const override {
1329 AU.addRequired<LibcallLoweringInfoWrapper>();
1330 AU.addRequired<TargetPassConfig>();
1331 if (OptLevel != CodeGenOptLevel::None)
1332 AU.addRequired<AssumptionCacheTracker>();
1333 AU.addPreserved<AAResultsWrapperPass>();
1334 AU.addPreserved<GlobalsAAWrapperPass>();
1335 AU.addRequired<LibcallLoweringInfoWrapper>();
1336 }
1337};
1338} // namespace
1339
1340ExpandIRInstsPass::ExpandIRInstsPass(const TargetMachine &TM,
1341 CodeGenOptLevel OptLevel)
1342 : TM(&TM), OptLevel(OptLevel) {}
1343
1344void ExpandIRInstsPass::printPipeline(
1345 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1346 static_cast<PassInfoMixin<ExpandIRInstsPass> *>(this)->printPipeline(
1347 OS, MapClassName2PassName);
1348 OS << '<';
1349 OS << "O" << (int)OptLevel;
1350 OS << '>';
1351}
1352
1353PreservedAnalyses ExpandIRInstsPass::run(Function &F,
1354 FunctionAnalysisManager &FAM) {
1355 const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
1356 auto &TLI = *STI->getTargetLowering();
1357 AssumptionCache *AC = nullptr;
1358 if (OptLevel != CodeGenOptLevel::None)
1359 AC = &FAM.getResult<AssumptionAnalysis>(IR&: F);
1360
1361 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
1362
1363 const LibcallLoweringModuleAnalysisResult *LibcallLowering =
1364 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(IR&: *F.getParent());
1365
1366 if (!LibcallLowering) {
1367 F.getContext().emitError(ErrorStr: "'" + LibcallLoweringModuleAnalysis::name() +
1368 "' analysis required");
1369 return PreservedAnalyses::all();
1370 }
1371
1372 const LibcallLoweringInfo &Libcalls =
1373 LibcallLowering->getLibcallLowering(Subtarget: *STI);
1374
1375 return runImpl(F, TLI, Libcalls, AC) ? PreservedAnalyses::none()
1376 : PreservedAnalyses::all();
1377}
1378
1379char ExpandIRInstsLegacyPass::ID = 0;
1380INITIALIZE_PASS_BEGIN(ExpandIRInstsLegacyPass, "expand-ir-insts",
1381 "Expand certain fp instructions", false, false)
1382INITIALIZE_PASS_DEPENDENCY(LibcallLoweringInfoWrapper)
1383INITIALIZE_PASS_END(ExpandIRInstsLegacyPass, "expand-ir-insts",
1384 "Expand IR instructions", false, false)
1385
1386FunctionPass *llvm::createExpandIRInstsPass(CodeGenOptLevel OptLevel) {
1387 return new ExpandIRInstsLegacyPass(OptLevel);
1388}
1389