1//===--- ExpandIRInsts.cpp - Expand IR instructions -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This pass expands certain instructions at the IR level.
9//
10// The following expansions are implemented:
11// - Expansion of ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’, ‘sitofp
12// .. to’ instructions with a bitwidth above a threshold. This is
13// useful for targets like x86_64 that cannot lower fp convertions
14// with more than 128 bits.
15//
16// - Expansion of ‘frem‘ for types MVT::f16, MVT::f32, and MVT::f64 for
17// targets which use "Expand" as the legalization action for the
18// corresponding type.
19//
20// - Expansion of ‘udiv‘, ‘sdiv‘, ‘urem‘, and ‘srem‘ instructions with
21// a bitwidth above a threshold into a call to auto-generated
22// functions. This is useful for targets like x86_64 that cannot
23// lower divisions with more than 128 bits or targets like x86_32 that
24// cannot lower divisions with more than 64 bits.
25//
26// Instructions with vector types are scalarized first if their scalar
27// types can be expanded. Scalable vector types are not supported.
28//===----------------------------------------------------------------------===//
29
30#include "llvm/CodeGen/ExpandIRInsts.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/Analysis/AssumptionCache.h"
33#include "llvm/Analysis/GlobalsModRef.h"
34#include "llvm/Analysis/SimplifyQuery.h"
35#include "llvm/Analysis/ValueTracking.h"
36#include "llvm/CodeGen/ISDOpcodes.h"
37#include "llvm/CodeGen/Passes.h"
38#include "llvm/CodeGen/TargetLowering.h"
39#include "llvm/CodeGen/TargetPassConfig.h"
40#include "llvm/CodeGen/TargetSubtargetInfo.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstIterator.h"
43#include "llvm/IR/IntrinsicInst.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/PassManager.h"
46#include "llvm/InitializePasses.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/CommandLine.h"
49#include "llvm/Support/ErrorHandling.h"
50#include "llvm/Target/TargetMachine.h"
51#include "llvm/Transforms/Utils/BasicBlockUtils.h"
52#include "llvm/Transforms/Utils/IntegerDivision.h"
53#include <llvm/Support/Casting.h>
54#include <optional>
55
56#define DEBUG_TYPE "expand-ir-insts"
57
58using namespace llvm;
59
60static cl::opt<unsigned>
61 ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
62 cl::init(Val: llvm::IntegerType::MAX_INT_BITS),
63 cl::desc("fp convert instructions on integers with "
64 "more than <N> bits are expanded."));
65
66static cl::opt<unsigned>
67 ExpandDivRemBits("expand-div-rem-bits", cl::Hidden,
68 cl::init(Val: llvm::IntegerType::MAX_INT_BITS),
69 cl::desc("div and rem instructions on integers with "
70 "more than <N> bits are expanded."));
71
72namespace {
73bool isConstantPowerOfTwo(llvm::Value *V, bool SignedOp) {
74 auto *C = dyn_cast<ConstantInt>(Val: V);
75 if (!C)
76 return false;
77
78 APInt Val = C->getValue();
79 if (SignedOp && Val.isNegative())
80 Val = -Val;
81 return Val.isPowerOf2();
82}
83
84bool isSigned(unsigned int Opcode) {
85 return Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
86}
87
88/// This class implements a precise expansion of the frem instruction.
89/// The generated code is based on the fmod implementation in the AMD device
90/// libs.
91class FRemExpander {
92 /// The IRBuilder to use for the expansion.
93 IRBuilder<> &B;
94
95 /// Floating point type of the return value and the arguments of the FRem
96 /// instructions that should be expanded.
97 Type *FremTy;
98
99 /// Floating point type to use for the computation. This may be
100 /// wider than the \p FremTy.
101 Type *ComputeFpTy;
102
103 /// Integer type used to hold the exponents returned by frexp.
104 Type *ExTy;
105
106 /// How many bits of the quotient to compute per iteration of the
107 /// algorithm, stored as a value of type \p ExTy.
108 Value *Bits;
109
110 /// Constant 1 of type \p ExTy.
111 Value *One;
112
113 /// The frem argument/return types that can be expanded by this class.
114 // TODO: The expansion could work for other floating point types
115 // as well, but this would require additional testing.
116 static constexpr std::array<MVT, 3> ExpandableTypes{MVT::f16, MVT::f32,
117 MVT::f64};
118
119public:
120 static bool canExpandType(Type *Ty) {
121 EVT VT = EVT::getEVT(Ty);
122 assert(VT.isSimple() && "Can expand only simple types");
123
124 return is_contained(Range: ExpandableTypes, Element: VT.getSimpleVT());
125 }
126
127 static bool shouldExpandFremType(const TargetLowering &TLI, EVT VT) {
128 assert(!VT.isVector() && "Cannot handle vector type; must scalarize first");
129 return TLI.getOperationAction(Op: ISD::FREM, VT) ==
130 TargetLowering::LegalizeAction::Expand;
131 }
132
133 static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
134 // Consider scalar type for simplicity. It seems unlikely that a
135 // vector type can be legalized without expansion if the scalar
136 // type cannot.
137 return shouldExpandFremType(TLI, VT: EVT::getEVT(Ty: Ty->getScalarType()));
138 }
139
140 /// Return true if the pass should expand frem instructions of any type
141 /// for the target represented by \p TLI.
142 static bool shouldExpandAnyFremType(const TargetLowering &TLI) {
143 return any_of(Range: ExpandableTypes,
144 P: [&](MVT V) { return shouldExpandFremType(TLI, VT: EVT(V)); });
145 }
146
147 static FRemExpander create(IRBuilder<> &B, Type *Ty) {
148 assert(canExpandType(Ty) && "Expected supported floating point type");
149
150 // The type to use for the computation of the remainder. This may be
151 // wider than the input/result type which affects the ...
152 Type *ComputeTy = Ty;
153 // ... maximum number of iterations of the remainder computation loop
154 // to use. This value is for the case in which the computation
155 // uses the same input/result type.
156 unsigned MaxIter = 2;
157
158 if (Ty->isHalfTy()) {
159 // Use the wider type and less iterations.
160 ComputeTy = B.getFloatTy();
161 MaxIter = 1;
162 }
163
164 unsigned Precision =
165 llvm::APFloat::semanticsPrecision(Ty->getFltSemantics());
166 return FRemExpander{B, Ty, Precision / MaxIter, ComputeTy};
167 }
168
169 /// Build the FRem expansion for the numerator \p X and the
170 /// denumerator \p Y. The type of X and Y must match \p FremTy. The
171 /// code will be generated at the insertion point of \p B and the
172 /// insertion point will be reset at exit.
173 Value *buildFRem(Value *X, Value *Y, std::optional<SimplifyQuery> &SQ) const;
174
175 /// Build an approximate FRem expansion for the numerator \p X and
176 /// the denumerator \p Y at the insertion point of builder \p B.
177 /// The type of X and Y must match \p FremTy.
178 Value *buildApproxFRem(Value *X, Value *Y) const;
179
180private:
181 FRemExpander(IRBuilder<> &B, Type *FremTy, unsigned Bits, Type *ComputeFpTy)
182 : B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()),
183 Bits(ConstantInt::get(Ty: ExTy, V: Bits)), One(ConstantInt::get(Ty: ExTy, V: 1)) {};
184
185 Value *createRcp(Value *V, const Twine &Name) const {
186 // Leave it to later optimizations to turn this into an rcp
187 // instruction if available.
188 return B.CreateFDiv(L: ConstantFP::get(Ty: ComputeFpTy, V: 1.0), R: V, Name);
189 }
190
191 // Helper function to build the UPDATE_AX code which is common to the
192 // loop body and the "final iteration".
193 Value *buildUpdateAx(Value *Ax, Value *Ay, Value *Ayinv) const {
194 // Build:
195 // float q = rint(ax * ayinv);
196 // ax = fma(-q, ay, ax);
197 // int clt = ax < 0.0f;
198 // float axp = ax + ay;
199 // ax = clt ? axp : ax;
200 Value *Q = B.CreateUnaryIntrinsic(ID: Intrinsic::rint, V: B.CreateFMul(L: Ax, R: Ayinv),
201 FMFSource: {}, Name: "q");
202 Value *AxUpdate = B.CreateFMA(Factor1: B.CreateFNeg(V: Q), Factor2: Ay, Summand: Ax, FMFSource: {}, Name: "ax");
203 Value *Clt = B.CreateFCmp(P: CmpInst::FCMP_OLT, LHS: AxUpdate,
204 RHS: ConstantFP::getZero(Ty: ComputeFpTy), Name: "clt");
205 Value *Axp = B.CreateFAdd(L: AxUpdate, R: Ay, Name: "axp");
206 return B.CreateSelect(C: Clt, True: Axp, False: AxUpdate, Name: "ax");
207 }
208
209 /// Build code to extract the exponent and mantissa of \p Src.
210 /// Return the exponent minus one for use as a loop bound and
211 /// the mantissa taken to the given \p NewExp power.
212 std::pair<Value *, Value *> buildExpAndPower(Value *Src, Value *NewExp,
213 const Twine &ExName,
214 const Twine &PowName) const {
215 // Build:
216 // ExName = frexp_exp(Src) - 1;
217 // PowName = fldexp(frexp_mant(ExName), NewExp);
218 Type *Ty = Src->getType();
219 Type *ExTy = B.getInt32Ty();
220 Value *Frexp = B.CreateIntrinsic(ID: Intrinsic::frexp, Types: {Ty, ExTy}, Args: Src);
221 Value *Mant = B.CreateExtractValue(Agg: Frexp, Idxs: {0});
222 Value *Exp = B.CreateExtractValue(Agg: Frexp, Idxs: {1});
223
224 Exp = B.CreateSub(LHS: Exp, RHS: One, Name: ExName);
225 Value *Pow = B.CreateLdexp(Src: Mant, Exp: NewExp, FMFSource: {}, Name: PowName);
226
227 return {Pow, Exp};
228 }
229
230 /// Build the main computation of the remainder for the case in which
231 /// Ax > Ay, where Ax = |X|, Ay = |Y|, and X is the numerator and Y the
232 /// denumerator. Add the incoming edge from the computation result
233 /// to \p RetPhi.
234 void buildRemainderComputation(Value *AxInitial, Value *AyInitial, Value *X,
235 PHINode *RetPhi, FastMathFlags FMF) const {
236 IRBuilder<>::FastMathFlagGuard Guard(B);
237 B.setFastMathFlags(FMF);
238
239 // Build:
240 // ex = frexp_exp(ax) - 1;
241 // ax = fldexp(frexp_mant(ax), bits);
242 // ey = frexp_exp(ay) - 1;
243 // ay = fledxp(frexp_mant(ay), 1);
244 auto [Ax, Ex] = buildExpAndPower(Src: AxInitial, NewExp: Bits, ExName: "ex", PowName: "ax");
245 auto [Ay, Ey] = buildExpAndPower(Src: AyInitial, NewExp: One, ExName: "ey", PowName: "ay");
246
247 // Build:
248 // int nb = ex - ey;
249 // float ayinv = 1.0/ay;
250 Value *Nb = B.CreateSub(LHS: Ex, RHS: Ey, Name: "nb");
251 Value *Ayinv = createRcp(V: Ay, Name: "ayinv");
252
253 // Build: while (nb > bits)
254 BasicBlock *PreheaderBB = B.GetInsertBlock();
255 Function *Fun = PreheaderBB->getParent();
256 auto *LoopBB = BasicBlock::Create(Context&: B.getContext(), Name: "frem.loop_body", Parent: Fun);
257 auto *ExitBB = BasicBlock::Create(Context&: B.getContext(), Name: "frem.loop_exit", Parent: Fun);
258
259 B.CreateCondBr(Cond: B.CreateICmp(P: CmpInst::ICMP_SGT, LHS: Nb, RHS: Bits), True: LoopBB, False: ExitBB);
260
261 // Build loop body:
262 // UPDATE_AX
263 // ax = fldexp(ax, bits);
264 // nb -= bits;
265 // One iteration of the loop is factored out. The code shared by
266 // the loop and this "iteration" is denoted by UPDATE_AX.
267 B.SetInsertPoint(LoopBB);
268 PHINode *NbIv = B.CreatePHI(Ty: Nb->getType(), NumReservedValues: 2, Name: "nb_iv");
269 NbIv->addIncoming(V: Nb, BB: PreheaderBB);
270
271 auto *AxPhi = B.CreatePHI(Ty: ComputeFpTy, NumReservedValues: 2, Name: "ax_loop_phi");
272 AxPhi->addIncoming(V: Ax, BB: PreheaderBB);
273
274 Value *AxPhiUpdate = buildUpdateAx(Ax: AxPhi, Ay, Ayinv);
275 AxPhiUpdate = B.CreateLdexp(Src: AxPhiUpdate, Exp: Bits, FMFSource: {}, Name: "ax_update");
276 AxPhi->addIncoming(V: AxPhiUpdate, BB: LoopBB);
277 NbIv->addIncoming(V: B.CreateSub(LHS: NbIv, RHS: Bits, Name: "nb_update"), BB: LoopBB);
278
279 B.CreateCondBr(Cond: B.CreateICmp(P: CmpInst::ICMP_SGT, LHS: NbIv, RHS: Bits), True: LoopBB, False: ExitBB);
280
281 // Build final iteration
282 // ax = fldexp(ax, nb - bits + 1);
283 // UPDATE_AX
284 B.SetInsertPoint(ExitBB);
285
286 auto *AxPhiExit = B.CreatePHI(Ty: ComputeFpTy, NumReservedValues: 2, Name: "ax_exit_phi");
287 AxPhiExit->addIncoming(V: Ax, BB: PreheaderBB);
288 AxPhiExit->addIncoming(V: AxPhi, BB: LoopBB);
289 auto *NbExitPhi = B.CreatePHI(Ty: Nb->getType(), NumReservedValues: 2, Name: "nb_exit_phi");
290 NbExitPhi->addIncoming(V: NbIv, BB: LoopBB);
291 NbExitPhi->addIncoming(V: Nb, BB: PreheaderBB);
292
293 Value *AxFinal = B.CreateLdexp(
294 Src: AxPhiExit, Exp: B.CreateAdd(LHS: B.CreateSub(LHS: NbExitPhi, RHS: Bits), RHS: One), FMFSource: {}, Name: "ax");
295 AxFinal = buildUpdateAx(Ax: AxFinal, Ay, Ayinv);
296
297 // Build:
298 // ax = fldexp(ax, ey);
299 // ret = copysign(ax,x);
300 AxFinal = B.CreateLdexp(Src: AxFinal, Exp: Ey, FMFSource: {}, Name: "ax");
301 if (ComputeFpTy != FremTy)
302 AxFinal = B.CreateFPTrunc(V: AxFinal, DestTy: FremTy);
303 Value *Ret = B.CreateCopySign(LHS: AxFinal, RHS: X);
304
305 RetPhi->addIncoming(V: Ret, BB: ExitBB);
306 }
307
308 /// Build the else-branch of the conditional in the FRem
309 /// expansion, i.e. the case in wich Ax <= Ay, where Ax = |X|, Ay
310 /// = |Y|, and X is the numerator and Y the denumerator. Add the
311 /// incoming edge from the result to \p RetPhi.
312 void buildElseBranch(Value *Ax, Value *Ay, Value *X, PHINode *RetPhi) const {
313 // Build:
314 // ret = ax == ay ? copysign(0.0f, x) : x;
315 Value *ZeroWithXSign = B.CreateCopySign(LHS: ConstantFP::getZero(Ty: FremTy), RHS: X);
316 Value *Ret = B.CreateSelect(C: B.CreateFCmpOEQ(LHS: Ax, RHS: Ay), True: ZeroWithXSign, False: X);
317
318 RetPhi->addIncoming(V: Ret, BB: B.GetInsertBlock());
319 }
320
321 /// Return a value that is NaN if one of the corner cases concerning
322 /// the inputs \p X and \p Y is detected, and \p Ret otherwise.
323 Value *handleInputCornerCases(Value *Ret, Value *X, Value *Y,
324 std::optional<SimplifyQuery> &SQ,
325 bool NoInfs) const {
326 // Build:
327 // ret = (y == 0.0f || isnan(y)) ? QNAN : ret;
328 // ret = isfinite(x) ? ret : QNAN;
329 Value *Nan = ConstantFP::getQNaN(Ty: FremTy);
330 Ret = B.CreateSelect(C: B.CreateFCmpUEQ(LHS: Y, RHS: ConstantFP::getZero(Ty: FremTy)), True: Nan,
331 False: Ret);
332 Value *XFinite =
333 NoInfs || (SQ && isKnownNeverInfinity(V: X, SQ: *SQ))
334 ? B.getTrue()
335 : B.CreateFCmpULT(LHS: B.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X),
336 RHS: ConstantFP::getInfinity(Ty: FremTy));
337 Ret = B.CreateSelect(C: XFinite, True: Ret, False: Nan);
338
339 return Ret;
340 }
341};
342
343Value *FRemExpander::buildApproxFRem(Value *X, Value *Y) const {
344 IRBuilder<>::FastMathFlagGuard Guard(B);
345 // Propagating the approximate functions flag to the
346 // division leads to an unacceptable drop in precision
347 // on AMDGPU.
348 // TODO Find out if any flags might be worth propagating.
349 B.clearFastMathFlags();
350
351 Value *Quot = B.CreateFDiv(L: X, R: Y);
352 Value *Trunc = B.CreateUnaryIntrinsic(ID: Intrinsic::trunc, V: Quot, FMFSource: {});
353 Value *Neg = B.CreateFNeg(V: Trunc);
354
355 return B.CreateFMA(Factor1: Neg, Factor2: Y, Summand: X);
356}
357
358Value *FRemExpander::buildFRem(Value *X, Value *Y,
359 std::optional<SimplifyQuery> &SQ) const {
360 assert(X->getType() == FremTy && Y->getType() == FremTy);
361
362 FastMathFlags FMF = B.getFastMathFlags();
363
364 // This function generates the following code structure:
365 // if (abs(x) > abs(y))
366 // { ret = compute remainder }
367 // else
368 // { ret = x or 0 with sign of x }
369 // Adjust ret to NaN/inf in input
370 // return ret
371 Value *Ax = B.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: X, FMFSource: {}, Name: "ax");
372 Value *Ay = B.CreateUnaryIntrinsic(ID: Intrinsic::fabs, V: Y, FMFSource: {}, Name: "ay");
373 if (ComputeFpTy != X->getType()) {
374 Ax = B.CreateFPExt(V: Ax, DestTy: ComputeFpTy, Name: "ax");
375 Ay = B.CreateFPExt(V: Ay, DestTy: ComputeFpTy, Name: "ay");
376 }
377 Value *AxAyCmp = B.CreateFCmpOGT(LHS: Ax, RHS: Ay);
378
379 PHINode *RetPhi = B.CreatePHI(Ty: FremTy, NumReservedValues: 2, Name: "ret");
380 Value *Ret = RetPhi;
381
382 // We would return NaN in all corner cases handled here.
383 // Hence, if NaNs are excluded, keep the result as it is.
384 if (!FMF.noNaNs())
385 Ret = handleInputCornerCases(Ret, X, Y, SQ, NoInfs: FMF.noInfs());
386
387 Function *Fun = B.GetInsertBlock()->getParent();
388 auto *ThenBB = BasicBlock::Create(Context&: B.getContext(), Name: "frem.compute", Parent: Fun);
389 auto *ElseBB = BasicBlock::Create(Context&: B.getContext(), Name: "frem.else", Parent: Fun);
390 SplitBlockAndInsertIfThenElse(Cond: AxAyCmp, SplitBefore: RetPhi, ThenBlock: &ThenBB, ElseBlock: &ElseBB);
391
392 auto SavedInsertPt = B.GetInsertPoint();
393
394 // Build remainder computation for "then" branch
395 //
396 // The ordered comparison ensures that ax and ay are not NaNs
397 // in the then-branch. Furthermore, y cannot be an infinity and the
398 // check at the end of the function ensures that the result will not
399 // be used if x is an infinity.
400 FastMathFlags ComputeFMF = FMF;
401 ComputeFMF.setNoInfs();
402 ComputeFMF.setNoNaNs();
403
404 B.SetInsertPoint(ThenBB);
405 buildRemainderComputation(AxInitial: Ax, AyInitial: Ay, X, RetPhi, FMF);
406 B.CreateBr(Dest: RetPhi->getParent());
407
408 // Build "else"-branch
409 B.SetInsertPoint(ElseBB);
410 buildElseBranch(Ax, Ay, X, RetPhi);
411 B.CreateBr(Dest: RetPhi->getParent());
412
413 B.SetInsertPoint(SavedInsertPt);
414
415 return Ret;
416}
417} // namespace
418
419static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) {
420 LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << '\n');
421
422 Type *Ty = I.getType();
423 assert(FRemExpander::canExpandType(Ty) &&
424 "Expected supported floating point type");
425
426 FastMathFlags FMF = I.getFastMathFlags();
427 // TODO Make use of those flags for optimization?
428 FMF.setAllowReciprocal(false);
429 FMF.setAllowContract(false);
430
431 IRBuilder<> B(&I);
432 B.setFastMathFlags(FMF);
433 B.SetCurrentDebugLocation(I.getDebugLoc());
434
435 const FRemExpander Expander = FRemExpander::create(B, Ty);
436 Value *Ret = FMF.approxFunc()
437 ? Expander.buildApproxFRem(X: I.getOperand(i_nocapture: 0), Y: I.getOperand(i_nocapture: 1))
438 : Expander.buildFRem(X: I.getOperand(i_nocapture: 0), Y: I.getOperand(i_nocapture: 1), SQ);
439
440 I.replaceAllUsesWith(V: Ret);
441 Ret->takeName(V: &I);
442 I.eraseFromParent();
443
444 return true;
445}
446// clang-format off: preserve formatting of the following example
447
448/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
449/// the generated code. This currently generates code similarly to compiler-rt's
450/// implementations.
451///
452/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
453/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
454/// entry:
455/// %0 = bitcast float %a to i32
456/// %conv.i = zext i32 %0 to i64
457/// %tobool.not = icmp sgt i32 %0, -1
458/// %conv = select i1 %tobool.not, i64 1, i64 -1
459/// %and = lshr i64 %conv.i, 23
460/// %shr = and i64 %and, 255
461/// %and2 = and i64 %conv.i, 8388607
462/// %or = or i64 %and2, 8388608
463/// %cmp = icmp ult i64 %shr, 127
464/// br i1 %cmp, label %cleanup, label %if.end
465///
466/// if.end: ; preds = %entry
467/// %sub = add nuw nsw i64 %shr, 4294967169
468/// %conv5 = and i64 %sub, 4294967232
469/// %cmp6.not = icmp eq i64 %conv5, 0
470/// br i1 %cmp6.not, label %if.end12, label %if.then8
471///
472/// if.then8: ; preds = %if.end
473/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64
474/// -9223372036854775808 br label %cleanup
475///
476/// if.end12: ; preds = %if.end
477/// %cmp13 = icmp ult i64 %shr, 150
478/// br i1 %cmp13, label %if.then15, label %if.else
479///
480/// if.then15: ; preds = %if.end12
481/// %sub16 = sub nuw nsw i64 150, %shr
482/// %shr17 = lshr i64 %or, %sub16
483/// %mul = mul nsw i64 %shr17, %conv
484/// br label %cleanup
485///
486/// if.else: ; preds = %if.end12
487/// %sub18 = add nsw i64 %shr, -150
488/// %shl = shl i64 %or, %sub18
489/// %mul19 = mul nsw i64 %shl, %conv
490/// br label %cleanup
491///
492/// cleanup: ; preds = %entry,
493/// %if.else, %if.then15, %if.then8
494/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [
495/// %mul19, %if.else ], [ 0, %entry ] ret i64 %retval.0
496/// }
497///
498/// Replace fp to integer with generated code.
499static void expandFPToI(Instruction *FPToI, bool IsSaturating, bool IsSigned) {
500 // clang-format on
501 IRBuilder<> Builder(FPToI);
502 auto *FloatVal = FPToI->getOperand(i: 0);
503 IntegerType *IntTy = cast<IntegerType>(Val: FPToI->getType());
504
505 unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
506 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
507
508 // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
509 // to i32 first following a sext/zext to target integer type.
510 Value *A1 = nullptr;
511 if (FloatVal->getType()->isHalfTy() && BitWidth >= 32) {
512 if (FPToI->getOpcode() == Instruction::FPToUI) {
513 Value *A0 = Builder.CreateFPToUI(V: FloatVal, DestTy: Builder.getInt32Ty());
514 A1 = Builder.CreateZExt(V: A0, DestTy: IntTy);
515 } else { // FPToSI
516 Value *A0 = Builder.CreateFPToSI(V: FloatVal, DestTy: Builder.getInt32Ty());
517 A1 = Builder.CreateSExt(V: A0, DestTy: IntTy);
518 }
519 FPToI->replaceAllUsesWith(V: A1);
520 FPToI->dropAllReferences();
521 FPToI->eraseFromParent();
522 return;
523 }
524
525 // fp80 conversion is implemented by fpext to fp128 first then do the
526 // conversion.
527 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
528 unsigned FloatWidth =
529 PowerOf2Ceil(A: FloatVal->getType()->getScalarSizeInBits());
530 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
531 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
532 IntegerType *FloatIntTy = Builder.getIntNTy(N: FloatWidth);
533 Value *ImplicitBit = ConstantInt::get(
534 Ty: FloatIntTy, V: APInt::getOneBitSet(numBits: FloatWidth, BitNo: FPMantissaWidth));
535 Value *SignificandMask = ConstantInt::get(
536 Ty: FloatIntTy, V: APInt::getLowBitsSet(numBits: FloatWidth, loBitsSet: FPMantissaWidth));
537
538 BasicBlock *Entry = Builder.GetInsertBlock();
539 Function *F = Entry->getParent();
540 Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
541 BasicBlock *CheckSaturateBB, *SaturateBB;
542 BasicBlock *End =
543 Entry->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "fp-to-i-cleanup");
544 if (IsSaturating) {
545 CheckSaturateBB = BasicBlock::Create(Context&: Builder.getContext(),
546 Name: "fp-to-i-if-check.saturate", Parent: F, InsertBefore: End);
547 SaturateBB =
548 BasicBlock::Create(Context&: Builder.getContext(), Name: "fp-to-i-if-saturate", Parent: F, InsertBefore: End);
549 }
550 BasicBlock *CheckExpSizeBB = BasicBlock::Create(
551 Context&: Builder.getContext(), Name: "fp-to-i-if-check.exp.size", Parent: F, InsertBefore: End);
552 BasicBlock *ExpSmallBB =
553 BasicBlock::Create(Context&: Builder.getContext(), Name: "fp-to-i-if-exp.small", Parent: F, InsertBefore: End);
554 BasicBlock *ExpLargeBB =
555 BasicBlock::Create(Context&: Builder.getContext(), Name: "fp-to-i-if-exp.large", Parent: F, InsertBefore: End);
556
557 Entry->getTerminator()->eraseFromParent();
558
559 // entry:
560 Builder.SetInsertPoint(Entry);
561 // We're going to introduce branches on the value, so freeze it.
562 if (!isGuaranteedNotToBeUndefOrPoison(V: FloatVal))
563 FloatVal = Builder.CreateFreeze(V: FloatVal);
564 // fp80 conversion is implemented by fpext to fp128 first then do the
565 // conversion.
566 if (FloatVal->getType()->isX86_FP80Ty())
567 FloatVal =
568 Builder.CreateFPExt(V: FloatVal, DestTy: Type::getFP128Ty(C&: Builder.getContext()));
569 Value *ARep = Builder.CreateBitCast(V: FloatVal, DestTy: FloatIntTy);
570 Value *PosOrNeg, *Sign;
571 if (IsSigned) {
572 PosOrNeg =
573 Builder.CreateICmpSGT(LHS: ARep, RHS: ConstantInt::getSigned(Ty: FloatIntTy, V: -1));
574 Sign = Builder.CreateSelect(C: PosOrNeg, True: ConstantInt::getSigned(Ty: IntTy, V: 1),
575 False: ConstantInt::getSigned(Ty: IntTy, V: -1), Name: "sign");
576 }
577 Value *And =
578 Builder.CreateLShr(LHS: ARep, RHS: Builder.getIntN(N: FloatWidth, C: FPMantissaWidth));
579 Value *BiasedExp = Builder.CreateAnd(
580 LHS: And, RHS: Builder.getIntN(N: FloatWidth, C: (1 << ExponentWidth) - 1), Name: "biased.exp");
581 Value *Abs = Builder.CreateAnd(LHS: ARep, RHS: SignificandMask);
582 Value *Significand = Builder.CreateOr(LHS: Abs, RHS: ImplicitBit, Name: "significand");
583 Value *ZeroResultCond = Builder.CreateICmpULT(
584 LHS: BiasedExp, RHS: Builder.getIntN(N: FloatWidth, C: ExponentBias), Name: "exp.is.negative");
585 if (IsSaturating) {
586 Value *IsNaN = Builder.CreateFCmpUNO(LHS: FloatVal, RHS: FloatVal, Name: "is.nan");
587 ZeroResultCond = Builder.CreateOr(LHS: ZeroResultCond, RHS: IsNaN);
588 if (!IsSigned) {
589 Value *IsNeg = Builder.CreateIsNeg(Arg: ARep);
590 ZeroResultCond = Builder.CreateOr(LHS: ZeroResultCond, RHS: IsNeg);
591 }
592 }
593 Builder.CreateCondBr(Cond: ZeroResultCond, True: End,
594 False: IsSaturating ? CheckSaturateBB : CheckExpSizeBB);
595
596 Value *Saturated;
597 if (IsSaturating) {
598 // check.saturate:
599 Builder.SetInsertPoint(CheckSaturateBB);
600 Value *Cmp3 = Builder.CreateICmpUGE(
601 LHS: BiasedExp, RHS: ConstantInt::getSigned(
602 Ty: FloatIntTy, V: static_cast<int64_t>(ExponentBias +
603 BitWidth - IsSigned)));
604 Builder.CreateCondBr(Cond: Cmp3, True: SaturateBB, False: CheckExpSizeBB);
605
606 // saturate:
607 Builder.SetInsertPoint(SaturateBB);
608 if (IsSigned) {
609 Value *SignedMax =
610 ConstantInt::get(Ty: IntTy, V: APInt::getSignedMaxValue(numBits: BitWidth));
611 Value *SignedMin =
612 ConstantInt::get(Ty: IntTy, V: APInt::getSignedMinValue(numBits: BitWidth));
613 Saturated =
614 Builder.CreateSelect(C: PosOrNeg, True: SignedMax, False: SignedMin, Name: "saturated");
615 } else {
616 Saturated = ConstantInt::getAllOnesValue(Ty: IntTy);
617 }
618 Builder.CreateBr(Dest: End);
619 }
620
621 // if.end9:
622 Builder.SetInsertPoint(CheckExpSizeBB);
623 Value *ExpSmallerMantissaWidth = Builder.CreateICmpULT(
624 LHS: BiasedExp, RHS: Builder.getIntN(N: FloatWidth, C: ExponentBias + FPMantissaWidth),
625 Name: "exp.smaller.mantissa.width");
626 Builder.CreateCondBr(Cond: ExpSmallerMantissaWidth, True: ExpSmallBB, False: ExpLargeBB);
627
628 // exp.small:
629 Builder.SetInsertPoint(ExpSmallBB);
630 Value *Sub13 = Builder.CreateSub(
631 LHS: Builder.getIntN(N: FloatWidth, C: ExponentBias + FPMantissaWidth), RHS: BiasedExp);
632 Value *ExpSmallRes =
633 Builder.CreateZExtOrTrunc(V: Builder.CreateLShr(LHS: Significand, RHS: Sub13), DestTy: IntTy);
634 if (IsSigned)
635 ExpSmallRes = Builder.CreateMul(LHS: ExpSmallRes, RHS: Sign);
636 Builder.CreateBr(Dest: End);
637
638 // exp.large:
639 Builder.SetInsertPoint(ExpLargeBB);
640 Value *Sub15 = Builder.CreateAdd(
641 LHS: BiasedExp,
642 RHS: ConstantInt::getSigned(
643 Ty: FloatIntTy, V: -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
644 Value *SignificandCast = Builder.CreateZExtOrTrunc(V: Significand, DestTy: IntTy);
645 Value *ExpLargeRes = Builder.CreateShl(
646 LHS: SignificandCast, RHS: Builder.CreateZExtOrTrunc(V: Sub15, DestTy: IntTy));
647 if (IsSigned)
648 ExpLargeRes = Builder.CreateMul(LHS: ExpLargeRes, RHS: Sign);
649 Builder.CreateBr(Dest: End);
650
651 // cleanup:
652 Builder.SetInsertPoint(TheBB: End, IP: End->begin());
653 PHINode *Retval0 = Builder.CreatePHI(Ty: FPToI->getType(), NumReservedValues: 3 + IsSaturating);
654
655 if (IsSaturating)
656 Retval0->addIncoming(V: Saturated, BB: SaturateBB);
657 Retval0->addIncoming(V: ExpSmallRes, BB: ExpSmallBB);
658 Retval0->addIncoming(V: ExpLargeRes, BB: ExpLargeBB);
659 Retval0->addIncoming(V: Builder.getIntN(N: BitWidth, C: 0), BB: Entry);
660
661 FPToI->replaceAllUsesWith(V: Retval0);
662 FPToI->dropAllReferences();
663 FPToI->eraseFromParent();
664}
665
666// clang-format off: preserve formatting of the following example
667
668/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
669/// the generated code. This currently generates code similarly to compiler-rt's
670/// implementations. This implementation has an implicit assumption that integer
671/// width is larger than fp.
672///
673/// An example IR generated from compiler-rt/floatdisf.c looks like below:
674/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
675/// entry:
676/// %cmp = icmp eq i64 %a, 0
677/// br i1 %cmp, label %return, label %if.end
678///
679/// if.end: ; preds = %entry
680/// %shr = ashr i64 %a, 63
681/// %xor = xor i64 %shr, %a
682/// %sub = sub nsw i64 %xor, %shr
683/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
684/// %cast = trunc i64 %0 to i32
685/// %sub1 = sub nuw nsw i32 64, %cast
686/// %sub2 = xor i32 %cast, 63
687/// %cmp3 = icmp ult i32 %cast, 40
688/// br i1 %cmp3, label %if.then4, label %if.else
689///
690/// if.then4: ; preds = %if.end
691/// switch i32 %sub1, label %sw.default [
692/// i32 25, label %sw.bb
693/// i32 26, label %sw.epilog
694/// ]
695///
696/// sw.bb: ; preds = %if.then4
697/// %shl = shl i64 %sub, 1
698/// br label %sw.epilog
699///
700/// sw.default: ; preds = %if.then4
701/// %sub5 = sub nsw i64 38, %0
702/// %sh_prom = and i64 %sub5, 4294967295
703/// %shr6 = lshr i64 %sub, %sh_prom
704/// %shr9 = lshr i64 274877906943, %0
705/// %and = and i64 %shr9, %sub
706/// %cmp10 = icmp ne i64 %and, 0
707/// %conv11 = zext i1 %cmp10 to i64
708/// %or = or i64 %shr6, %conv11
709/// br label %sw.epilog
710///
711/// sw.epilog: ; preds = %sw.default,
712/// %if.then4, %sw.bb
713/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl,
714/// %sw.bb ] %1 = lshr i64 %a.addr.0, 2 %2 = and i64 %1, 1 %or16 = or i64 %2,
715/// %a.addr.0 %inc = add nsw i64 %or16, 1 %3 = and i64 %inc, 67108864
716/// %tobool.not = icmp eq i64 %3, 0
717/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
718/// %spec.select = ashr i64 %inc, %spec.select.v
719/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
720/// br label %if.end26
721///
722/// if.else: ; preds = %if.end
723/// %sub23 = add nuw nsw i64 %0, 4294967256
724/// %sh_prom24 = and i64 %sub23, 4294967295
725/// %shl25 = shl i64 %sub, %sh_prom24
726/// br label %if.end26
727///
728/// if.end26: ; preds = %sw.epilog,
729/// %if.else
730/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
731/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
732/// %conv27 = trunc i64 %shr to i32
733/// %and28 = and i32 %conv27, -2147483648
734/// %add = shl nuw nsw i32 %e.0, 23
735/// %shl29 = add nuw nsw i32 %add, 1065353216
736/// %conv31 = trunc i64 %a.addr.1 to i32
737/// %and32 = and i32 %conv31, 8388607
738/// %or30 = or i32 %and32, %and28
739/// %or33 = or i32 %or30, %shl29
740/// %4 = bitcast i32 %or33 to float
741/// br label %return
742///
743/// return: ; preds = %entry,
744/// %if.end26
745/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
746/// ret float %retval.0
747/// }
748///
749/// Replace integer to fp with generated code.
750static void expandIToFP(Instruction *IToFP) {
751 // clang-format on
752 IRBuilder<> Builder(IToFP);
753 auto *IntVal = IToFP->getOperand(i: 0);
754 IntegerType *IntTy = cast<IntegerType>(Val: IntVal->getType());
755
756 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
757 unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
758 // fp80 conversion is implemented by conversion tp fp128 first following
759 // a fptrunc to fp80.
760 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
761 // FIXME: As there is no related builtins added in compliler-rt,
762 // here currently utilized the fp32 <-> fp16 lib calls to implement.
763 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
764 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
765 unsigned FloatWidth = PowerOf2Ceil(A: FPMantissaWidth);
766 bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
767
768 // We're going to introduce branches on the value, so freeze it.
769 if (!isGuaranteedNotToBeUndefOrPoison(V: IntVal))
770 IntVal = Builder.CreateFreeze(V: IntVal);
771
772 // The expansion below assumes that int width >= float width. Zero or sign
773 // extend the integer accordingly.
774 if (BitWidth < FloatWidth) {
775 BitWidth = FloatWidth;
776 IntTy = Builder.getIntNTy(N: BitWidth);
777 IntVal = Builder.CreateIntCast(V: IntVal, DestTy: IntTy, isSigned: IsSigned);
778 }
779
780 Value *Temp1 =
781 Builder.CreateShl(LHS: Builder.getIntN(N: BitWidth, C: 1),
782 RHS: Builder.getIntN(N: BitWidth, C: FPMantissaWidth + 3));
783
784 BasicBlock *Entry = Builder.GetInsertBlock();
785 Function *F = Entry->getParent();
786 Entry->setName(Twine(Entry->getName(), "itofp-entry"));
787 BasicBlock *End =
788 Entry->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "itofp-return");
789 BasicBlock *IfEnd =
790 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-end", Parent: F, InsertBefore: End);
791 BasicBlock *IfThen4 =
792 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-then4", Parent: F, InsertBefore: End);
793 BasicBlock *SwBB =
794 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-sw-bb", Parent: F, InsertBefore: End);
795 BasicBlock *SwDefault =
796 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-sw-default", Parent: F, InsertBefore: End);
797 BasicBlock *SwEpilog =
798 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-sw-epilog", Parent: F, InsertBefore: End);
799 BasicBlock *IfThen20 =
800 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-then20", Parent: F, InsertBefore: End);
801 BasicBlock *IfElse =
802 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-else", Parent: F, InsertBefore: End);
803 BasicBlock *IfEnd26 =
804 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-end26", Parent: F, InsertBefore: End);
805
806 Entry->getTerminator()->eraseFromParent();
807
808 Function *CTLZ =
809 Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: Intrinsic::ctlz, Tys: IntTy);
810 ConstantInt *True = Builder.getTrue();
811
812 // entry:
813 Builder.SetInsertPoint(Entry);
814 Value *Cmp = Builder.CreateICmpEQ(LHS: IntVal, RHS: ConstantInt::getSigned(Ty: IntTy, V: 0));
815 Builder.CreateCondBr(Cond: Cmp, True: End, False: IfEnd);
816
817 // if.end:
818 Builder.SetInsertPoint(IfEnd);
819 Value *Shr =
820 Builder.CreateAShr(LHS: IntVal, RHS: Builder.getIntN(N: BitWidth, C: BitWidth - 1));
821 Value *Xor = Builder.CreateXor(LHS: Shr, RHS: IntVal);
822 Value *Sub = Builder.CreateSub(LHS: Xor, RHS: Shr);
823 Value *Call = Builder.CreateCall(Callee: CTLZ, Args: {IsSigned ? Sub : IntVal, True});
824 Value *Cast = Builder.CreateTrunc(V: Call, DestTy: Builder.getInt32Ty());
825 int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
826 Value *Sub1 = Builder.CreateSub(LHS: Builder.getIntN(N: BitWidthNew, C: BitWidth),
827 RHS: FloatWidth == 128 ? Call : Cast);
828 Value *Sub2 = Builder.CreateSub(LHS: Builder.getIntN(N: BitWidthNew, C: BitWidth - 1),
829 RHS: FloatWidth == 128 ? Call : Cast);
830 Value *Cmp3 = Builder.CreateICmpSGT(
831 LHS: Sub1, RHS: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + 1));
832 Builder.CreateCondBr(Cond: Cmp3, True: IfThen4, False: IfElse);
833
834 // if.then4:
835 Builder.SetInsertPoint(IfThen4);
836 llvm::SwitchInst *SI = Builder.CreateSwitch(V: Sub1, Dest: SwDefault);
837 SI->addCase(OnVal: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + 2), Dest: SwBB);
838 SI->addCase(OnVal: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + 3), Dest: SwEpilog);
839
840 // sw.bb:
841 Builder.SetInsertPoint(SwBB);
842 Value *Shl =
843 Builder.CreateShl(LHS: IsSigned ? Sub : IntVal, RHS: Builder.getIntN(N: BitWidth, C: 1));
844 Builder.CreateBr(Dest: SwEpilog);
845
846 // sw.default:
847 Builder.SetInsertPoint(SwDefault);
848 Value *Sub5 = Builder.CreateSub(
849 LHS: Builder.getIntN(N: BitWidthNew, C: BitWidth - FPMantissaWidth - 3),
850 RHS: FloatWidth == 128 ? Call : Cast);
851 Value *ShProm = Builder.CreateZExt(V: Sub5, DestTy: IntTy);
852 Value *Shr6 = Builder.CreateLShr(LHS: IsSigned ? Sub : IntVal,
853 RHS: FloatWidth == 128 ? Sub5 : ShProm);
854 Value *Sub8 =
855 Builder.CreateAdd(LHS: FloatWidth == 128 ? Call : Cast,
856 RHS: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + 3));
857 Value *ShProm9 = Builder.CreateZExt(V: Sub8, DestTy: IntTy);
858 Value *Shr9 = Builder.CreateLShr(LHS: ConstantInt::getSigned(Ty: IntTy, V: -1),
859 RHS: FloatWidth == 128 ? Sub8 : ShProm9);
860 Value *And = Builder.CreateAnd(LHS: Shr9, RHS: IsSigned ? Sub : IntVal);
861 Value *Cmp10 = Builder.CreateICmpNE(LHS: And, RHS: Builder.getIntN(N: BitWidth, C: 0));
862 Value *Conv11 = Builder.CreateZExt(V: Cmp10, DestTy: IntTy);
863 Value *Or = Builder.CreateOr(LHS: Shr6, RHS: Conv11);
864 Builder.CreateBr(Dest: SwEpilog);
865
866 // sw.epilog:
867 Builder.SetInsertPoint(SwEpilog);
868 PHINode *AAddr0 = Builder.CreatePHI(Ty: IntTy, NumReservedValues: 3);
869 AAddr0->addIncoming(V: Or, BB: SwDefault);
870 AAddr0->addIncoming(V: IsSigned ? Sub : IntVal, BB: IfThen4);
871 AAddr0->addIncoming(V: Shl, BB: SwBB);
872 Value *A0 = Builder.CreateTrunc(V: AAddr0, DestTy: Builder.getInt32Ty());
873 Value *A1 = Builder.CreateLShr(LHS: A0, RHS: Builder.getInt32(C: 2));
874 Value *A2 = Builder.CreateAnd(LHS: A1, RHS: Builder.getInt32(C: 1));
875 Value *Conv16 = Builder.CreateZExt(V: A2, DestTy: IntTy);
876 Value *Or17 = Builder.CreateOr(LHS: AAddr0, RHS: Conv16);
877 Value *Inc = Builder.CreateAdd(LHS: Or17, RHS: Builder.getIntN(N: BitWidth, C: 1));
878 Value *Shr18 = nullptr;
879 if (IsSigned)
880 Shr18 = Builder.CreateAShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: 2));
881 else
882 Shr18 = Builder.CreateLShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: 2));
883 Value *A3 = Builder.CreateAnd(LHS: Inc, RHS: Temp1, Name: "a3");
884 Value *PosOrNeg = Builder.CreateICmpEQ(LHS: A3, RHS: Builder.getIntN(N: BitWidth, C: 0));
885 Value *ExtractT60 = Builder.CreateTrunc(V: Shr18, DestTy: Builder.getIntNTy(N: FloatWidth));
886 Value *Extract63 = Builder.CreateLShr(LHS: Shr18, RHS: Builder.getIntN(N: BitWidth, C: 32));
887 Value *ExtractT64 = nullptr;
888 if (FloatWidth > 80)
889 ExtractT64 = Builder.CreateTrunc(V: Sub2, DestTy: Builder.getInt64Ty());
890 else
891 ExtractT64 = Builder.CreateTrunc(V: Extract63, DestTy: Builder.getInt32Ty());
892 Builder.CreateCondBr(Cond: PosOrNeg, True: IfEnd26, False: IfThen20);
893
894 // if.then20
895 Builder.SetInsertPoint(IfThen20);
896 Value *Shr21 = nullptr;
897 if (IsSigned)
898 Shr21 = Builder.CreateAShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: 3));
899 else
900 Shr21 = Builder.CreateLShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: 3));
901 Value *ExtractT = Builder.CreateTrunc(V: Shr21, DestTy: Builder.getIntNTy(N: FloatWidth));
902 Value *Extract = Builder.CreateLShr(LHS: Shr21, RHS: Builder.getIntN(N: BitWidth, C: 32));
903 Value *ExtractT62 = nullptr;
904 if (FloatWidth > 80)
905 ExtractT62 = Builder.CreateTrunc(V: Sub1, DestTy: Builder.getInt64Ty());
906 else
907 ExtractT62 = Builder.CreateTrunc(V: Extract, DestTy: Builder.getInt32Ty());
908 Builder.CreateBr(Dest: IfEnd26);
909
910 // if.else:
911 Builder.SetInsertPoint(IfElse);
912 Value *Sub24 = Builder.CreateAdd(
913 LHS: FloatWidth == 128 ? Call : Cast,
914 RHS: ConstantInt::getSigned(Ty: Builder.getIntNTy(N: BitWidthNew),
915 V: -(int)(BitWidth - FPMantissaWidth - 1)));
916 Value *ShProm25 = Builder.CreateZExt(V: Sub24, DestTy: IntTy);
917 Value *Shl26 = Builder.CreateShl(LHS: IsSigned ? Sub : IntVal,
918 RHS: FloatWidth == 128 ? Sub24 : ShProm25);
919 Value *ExtractT61 = Builder.CreateTrunc(V: Shl26, DestTy: Builder.getIntNTy(N: FloatWidth));
920 Value *Extract65 = Builder.CreateLShr(LHS: Shl26, RHS: Builder.getIntN(N: BitWidth, C: 32));
921 Value *ExtractT66 = nullptr;
922 if (FloatWidth > 80)
923 ExtractT66 = Builder.CreateTrunc(V: Sub2, DestTy: Builder.getInt64Ty());
924 else
925 ExtractT66 = Builder.CreateTrunc(V: Extract65, DestTy: Builder.getInt32Ty());
926 Builder.CreateBr(Dest: IfEnd26);
927
928 // if.end26:
929 Builder.SetInsertPoint(IfEnd26);
930 PHINode *AAddr1Off0 = Builder.CreatePHI(Ty: Builder.getIntNTy(N: FloatWidth), NumReservedValues: 3);
931 AAddr1Off0->addIncoming(V: ExtractT, BB: IfThen20);
932 AAddr1Off0->addIncoming(V: ExtractT60, BB: SwEpilog);
933 AAddr1Off0->addIncoming(V: ExtractT61, BB: IfElse);
934 PHINode *AAddr1Off32 = nullptr;
935 if (FloatWidth > 32) {
936 AAddr1Off32 =
937 Builder.CreatePHI(Ty: Builder.getIntNTy(N: FloatWidth > 80 ? 64 : 32), NumReservedValues: 3);
938 AAddr1Off32->addIncoming(V: ExtractT62, BB: IfThen20);
939 AAddr1Off32->addIncoming(V: ExtractT64, BB: SwEpilog);
940 AAddr1Off32->addIncoming(V: ExtractT66, BB: IfElse);
941 }
942 PHINode *E0 = nullptr;
943 if (FloatWidth <= 80) {
944 E0 = Builder.CreatePHI(Ty: Builder.getIntNTy(N: BitWidthNew), NumReservedValues: 3);
945 E0->addIncoming(V: Sub1, BB: IfThen20);
946 E0->addIncoming(V: Sub2, BB: SwEpilog);
947 E0->addIncoming(V: Sub2, BB: IfElse);
948 }
949 Value *And29 = nullptr;
950 if (FloatWidth > 80) {
951 Value *Temp2 = Builder.CreateShl(LHS: Builder.getIntN(N: BitWidth, C: 1),
952 RHS: Builder.getIntN(N: BitWidth, C: 63));
953 And29 = Builder.CreateAnd(LHS: Shr, RHS: Temp2, Name: "and29");
954 } else {
955 Value *Conv28 = Builder.CreateTrunc(V: Shr, DestTy: Builder.getInt32Ty());
956 And29 = Builder.CreateAnd(
957 LHS: Conv28, RHS: ConstantInt::get(Context&: Builder.getContext(), V: APInt::getSignMask(BitWidth: 32)));
958 }
959 unsigned TempMod = FPMantissaWidth % 32;
960 Value *And34 = nullptr;
961 Value *Shl30 = nullptr;
962 if (FloatWidth > 80) {
963 TempMod += 32;
964 Value *Add = Builder.CreateShl(LHS: AAddr1Off32, RHS: Builder.getInt64(C: TempMod));
965 Shl30 = Builder.CreateAdd(
966 LHS: Add, RHS: Builder.getInt64(C: ((1ull << (62ull - TempMod)) - 1ull) << TempMod));
967 And34 = Builder.CreateZExt(V: Shl30, DestTy: Builder.getInt128Ty());
968 } else {
969 Value *Add = Builder.CreateShl(LHS: E0, RHS: Builder.getInt32(C: TempMod));
970 Shl30 = Builder.CreateAdd(
971 LHS: Add, RHS: Builder.getInt32(C: ((1 << (30 - TempMod)) - 1) << TempMod));
972 And34 = Builder.CreateAnd(LHS: FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
973 RHS: Builder.getInt32(C: (1 << TempMod) - 1));
974 }
975 Value *Or35 = nullptr;
976 if (FloatWidth > 80) {
977 Value *And29Trunc = Builder.CreateTrunc(V: And29, DestTy: Builder.getInt128Ty());
978 Value *Or31 = Builder.CreateOr(LHS: And29Trunc, RHS: And34);
979 Value *Or34 = Builder.CreateShl(LHS: Or31, RHS: Builder.getIntN(N: 128, C: 64));
980 Value *Temp3 = Builder.CreateShl(LHS: Builder.getIntN(N: 128, C: 1),
981 RHS: Builder.getIntN(N: 128, C: FPMantissaWidth));
982 Value *Temp4 = Builder.CreateSub(LHS: Temp3, RHS: Builder.getIntN(N: 128, C: 1));
983 Value *A6 = Builder.CreateAnd(LHS: AAddr1Off0, RHS: Temp4);
984 Or35 = Builder.CreateOr(LHS: Or34, RHS: A6);
985 } else {
986 Value *Or31 = Builder.CreateOr(LHS: And34, RHS: And29);
987 Or35 = Builder.CreateOr(LHS: IsSigned ? Or31 : And34, RHS: Shl30);
988 }
989 Value *A4 = nullptr;
990 if (IToFP->getType()->isDoubleTy()) {
991 Value *ZExt1 = Builder.CreateZExt(V: Or35, DestTy: Builder.getIntNTy(N: FloatWidth));
992 Value *Shl1 = Builder.CreateShl(LHS: ZExt1, RHS: Builder.getIntN(N: FloatWidth, C: 32));
993 Value *And1 =
994 Builder.CreateAnd(LHS: AAddr1Off0, RHS: Builder.getIntN(N: FloatWidth, C: 0xFFFFFFFF));
995 Value *Or1 = Builder.CreateOr(LHS: Shl1, RHS: And1);
996 A4 = Builder.CreateBitCast(V: Or1, DestTy: IToFP->getType());
997 } else if (IToFP->getType()->isX86_FP80Ty()) {
998 Value *A40 =
999 Builder.CreateBitCast(V: Or35, DestTy: Type::getFP128Ty(C&: Builder.getContext()));
1000 A4 = Builder.CreateFPTrunc(V: A40, DestTy: IToFP->getType());
1001 } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {
1002 // Deal with "half" situation. This is a workaround since we don't have
1003 // floattihf.c currently as referring.
1004 Value *A40 =
1005 Builder.CreateBitCast(V: Or35, DestTy: Type::getFloatTy(C&: Builder.getContext()));
1006 A4 = Builder.CreateFPTrunc(V: A40, DestTy: IToFP->getType());
1007 } else // float type
1008 A4 = Builder.CreateBitCast(V: Or35, DestTy: IToFP->getType());
1009 Builder.CreateBr(Dest: End);
1010
1011 // return:
1012 Builder.SetInsertPoint(TheBB: End, IP: End->begin());
1013 PHINode *Retval0 = Builder.CreatePHI(Ty: IToFP->getType(), NumReservedValues: 2);
1014 Retval0->addIncoming(V: A4, BB: IfEnd26);
1015 Retval0->addIncoming(V: ConstantFP::getZero(Ty: IToFP->getType(), Negative: false), BB: Entry);
1016
1017 IToFP->replaceAllUsesWith(V: Retval0);
1018 IToFP->dropAllReferences();
1019 IToFP->eraseFromParent();
1020}
1021
1022static void scalarize(Instruction *I,
1023 SmallVectorImpl<Instruction *> &Worklist) {
1024 VectorType *VTy = cast<FixedVectorType>(Val: I->getType());
1025
1026 IRBuilder<> Builder(I);
1027
1028 unsigned NumElements = VTy->getElementCount().getFixedValue();
1029 Value *Result = PoisonValue::get(T: VTy);
1030 for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
1031 Value *Ext = Builder.CreateExtractElement(Vec: I->getOperand(i: 0), Idx);
1032
1033 Value *NewOp = nullptr;
1034 if (auto *BinOp = dyn_cast<BinaryOperator>(Val: I))
1035 NewOp = Builder.CreateBinOp(
1036 Opc: BinOp->getOpcode(), LHS: Ext,
1037 RHS: Builder.CreateExtractElement(Vec: I->getOperand(i: 1), Idx));
1038 else if (auto *CastI = dyn_cast<CastInst>(Val: I))
1039 NewOp = Builder.CreateCast(Op: CastI->getOpcode(), V: Ext,
1040 DestTy: I->getType()->getScalarType());
1041 else
1042 llvm_unreachable("Unsupported instruction type");
1043
1044 Result = Builder.CreateInsertElement(Vec: Result, NewElt: NewOp, Idx);
1045 if (auto *ScalarizedI = dyn_cast<Instruction>(Val: NewOp)) {
1046 ScalarizedI->copyIRFlags(V: I, IncludeWrapFlags: true);
1047 Worklist.push_back(Elt: ScalarizedI);
1048 }
1049 }
1050
1051 I->replaceAllUsesWith(V: Result);
1052 I->dropAllReferences();
1053 I->eraseFromParent();
1054}
1055
1056static void addToWorklist(Instruction &I,
1057 SmallVector<Instruction *, 4> &Worklist) {
1058 if (I.getOperand(i: 0)->getType()->isVectorTy())
1059 scalarize(I: &I, Worklist);
1060 else
1061 Worklist.push_back(Elt: &I);
1062}
1063
1064static bool runImpl(Function &F, const TargetLowering &TLI,
1065 const LibcallLoweringInfo &Libcalls, AssumptionCache *AC) {
1066 SmallVector<Instruction *, 4> Worklist;
1067
1068 unsigned MaxLegalFpConvertBitWidth =
1069 TLI.getMaxLargeFPConvertBitWidthSupported();
1070 if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS)
1071 MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
1072
1073 unsigned MaxLegalDivRemBitWidth = TLI.getMaxDivRemBitWidthSupported();
1074 if (ExpandDivRemBits != llvm::IntegerType::MAX_INT_BITS)
1075 MaxLegalDivRemBitWidth = ExpandDivRemBits;
1076
1077 bool DisableExpandLargeFp =
1078 MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS;
1079 bool DisableExpandLargeDivRem =
1080 MaxLegalDivRemBitWidth >= llvm::IntegerType::MAX_INT_BITS;
1081 bool DisableFrem = !FRemExpander::shouldExpandAnyFremType(TLI);
1082
1083 if (DisableExpandLargeFp && DisableFrem && DisableExpandLargeDivRem)
1084 return false;
1085
1086 auto ShouldHandleInst = [&](Instruction &I) {
1087 Type *Ty = I.getType();
1088 // TODO: This pass doesn't handle scalable vectors.
1089 if (Ty->isScalableTy())
1090 return false;
1091
1092 switch (I.getOpcode()) {
1093 case Instruction::FRem:
1094 return !DisableFrem && FRemExpander::shouldExpandFremType(TLI, Ty);
1095 case Instruction::FPToUI:
1096 case Instruction::FPToSI:
1097 return !DisableExpandLargeFp &&
1098 cast<IntegerType>(Val: Ty->getScalarType())->getIntegerBitWidth() >
1099 MaxLegalFpConvertBitWidth;
1100 case Instruction::UIToFP:
1101 case Instruction::SIToFP:
1102 return !DisableExpandLargeFp &&
1103 cast<IntegerType>(Val: I.getOperand(i: 0)->getType()->getScalarType())
1104 ->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
1105 case Instruction::UDiv:
1106 case Instruction::SDiv:
1107 case Instruction::URem:
1108 case Instruction::SRem:
1109 return !DisableExpandLargeDivRem &&
1110 cast<IntegerType>(Val: Ty->getScalarType())->getIntegerBitWidth() >
1111 MaxLegalDivRemBitWidth
1112 // The backend has peephole optimizations for powers of two.
1113 // TODO: We don't consider vectors here.
1114 && !isConstantPowerOfTwo(V: I.getOperand(i: 1), SignedOp: isSigned(Opcode: I.getOpcode()));
1115 case Instruction::Call: {
1116 auto *II = dyn_cast<IntrinsicInst>(Val: &I);
1117 if (II && (II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1118 II->getIntrinsicID() == Intrinsic::fptosi_sat)) {
1119 return !DisableExpandLargeFp &&
1120 cast<IntegerType>(Val: Ty->getScalarType())->getIntegerBitWidth() >
1121 MaxLegalFpConvertBitWidth;
1122 }
1123 return false;
1124 }
1125 }
1126
1127 return false;
1128 };
1129
1130 bool Modified = false;
1131 for (auto It = inst_begin(F: &F), End = inst_end(F); It != End;) {
1132 Instruction &I = *It++;
1133 if (!ShouldHandleInst(I))
1134 continue;
1135
1136 addToWorklist(I, Worklist);
1137 Modified = true;
1138 }
1139
1140 while (!Worklist.empty()) {
1141 Instruction *I = Worklist.pop_back_val();
1142
1143 switch (I->getOpcode()) {
1144 case Instruction::FRem: {
1145 auto SQ = [&]() -> std::optional<SimplifyQuery> {
1146 if (AC) {
1147 auto Res = std::make_optional<SimplifyQuery>(
1148 args: I->getModule()->getDataLayout(), args&: I);
1149 Res->AC = AC;
1150 return Res;
1151 }
1152 return {};
1153 }();
1154
1155 expandFRem(I&: cast<BinaryOperator>(Val&: *I), SQ);
1156 break;
1157 }
1158
1159 case Instruction::FPToUI:
1160 expandFPToI(FPToI: I, /*IsSaturating=*/false, /*IsSigned=*/false);
1161 break;
1162 case Instruction::FPToSI:
1163 expandFPToI(FPToI: I, /*IsSaturating=*/false, /*IsSigned=*/true);
1164 break;
1165
1166 case Instruction::UIToFP:
1167 case Instruction::SIToFP:
1168 expandIToFP(IToFP: I);
1169 break;
1170
1171 case Instruction::UDiv:
1172 case Instruction::SDiv:
1173 expandDivision(Div: cast<BinaryOperator>(Val: I));
1174 break;
1175 case Instruction::URem:
1176 case Instruction::SRem:
1177 expandRemainder(Rem: cast<BinaryOperator>(Val: I));
1178 break;
1179
1180 case Instruction::Call: {
1181 auto *II = cast<IntrinsicInst>(Val: I);
1182 assert(II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1183 II->getIntrinsicID() == Intrinsic::fptosi_sat);
1184 expandFPToI(FPToI: I, /*IsSaturating=*/true,
1185 /*IsSigned=*/II->getIntrinsicID() == Intrinsic::fptosi_sat);
1186 break;
1187 }
1188 }
1189 }
1190
1191 return Modified;
1192}
1193
1194namespace {
1195class ExpandIRInstsLegacyPass : public FunctionPass {
1196 CodeGenOptLevel OptLevel;
1197
1198public:
1199 static char ID;
1200
1201 ExpandIRInstsLegacyPass(CodeGenOptLevel OptLevel)
1202 : FunctionPass(ID), OptLevel(OptLevel) {}
1203
1204 ExpandIRInstsLegacyPass() : ExpandIRInstsLegacyPass(CodeGenOptLevel::None) {};
1205
1206 bool runOnFunction(Function &F) override {
1207 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1208 const TargetSubtargetInfo *Subtarget = TM->getSubtargetImpl(F);
1209 auto *TLI = Subtarget->getTargetLowering();
1210 AssumptionCache *AC = nullptr;
1211
1212 const LibcallLoweringInfo &Libcalls =
1213 getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
1214 M: *F.getParent(), Subtarget: *Subtarget);
1215
1216 if (OptLevel != CodeGenOptLevel::None && !F.hasOptNone())
1217 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1218 return runImpl(F, TLI: *TLI, Libcalls, AC);
1219 }
1220
1221 void getAnalysisUsage(AnalysisUsage &AU) const override {
1222 AU.addRequired<LibcallLoweringInfoWrapper>();
1223 AU.addRequired<TargetPassConfig>();
1224 if (OptLevel != CodeGenOptLevel::None)
1225 AU.addRequired<AssumptionCacheTracker>();
1226 AU.addPreserved<AAResultsWrapperPass>();
1227 AU.addPreserved<GlobalsAAWrapperPass>();
1228 AU.addRequired<LibcallLoweringInfoWrapper>();
1229 }
1230};
1231} // namespace
1232
1233ExpandIRInstsPass::ExpandIRInstsPass(const TargetMachine &TM,
1234 CodeGenOptLevel OptLevel)
1235 : TM(&TM), OptLevel(OptLevel) {}
1236
1237void ExpandIRInstsPass::printPipeline(
1238 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1239 static_cast<PassInfoMixin<ExpandIRInstsPass> *>(this)->printPipeline(
1240 OS, MapClassName2PassName);
1241 OS << '<';
1242 OS << "O" << (int)OptLevel;
1243 OS << '>';
1244}
1245
1246PreservedAnalyses ExpandIRInstsPass::run(Function &F,
1247 FunctionAnalysisManager &FAM) {
1248 const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
1249 auto &TLI = *STI->getTargetLowering();
1250 AssumptionCache *AC = nullptr;
1251 if (OptLevel != CodeGenOptLevel::None)
1252 AC = &FAM.getResult<AssumptionAnalysis>(IR&: F);
1253
1254 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
1255
1256 const LibcallLoweringModuleAnalysisResult *LibcallLowering =
1257 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(IR&: *F.getParent());
1258
1259 if (!LibcallLowering) {
1260 F.getContext().emitError(ErrorStr: "'" + LibcallLoweringModuleAnalysis::name() +
1261 "' analysis required");
1262 return PreservedAnalyses::all();
1263 }
1264
1265 const LibcallLoweringInfo &Libcalls =
1266 LibcallLowering->getLibcallLowering(Subtarget: *STI);
1267
1268 return runImpl(F, TLI, Libcalls, AC) ? PreservedAnalyses::none()
1269 : PreservedAnalyses::all();
1270}
1271
1272char ExpandIRInstsLegacyPass::ID = 0;
1273INITIALIZE_PASS_BEGIN(ExpandIRInstsLegacyPass, "expand-ir-insts",
1274 "Expand certain fp instructions", false, false)
1275INITIALIZE_PASS_DEPENDENCY(LibcallLoweringInfoWrapper)
1276INITIALIZE_PASS_END(ExpandIRInstsLegacyPass, "expand-ir-insts",
1277 "Expand IR instructions", false, false)
1278
1279FunctionPass *llvm::createExpandIRInstsPass(CodeGenOptLevel OptLevel) {
1280 return new ExpandIRInstsLegacyPass(OptLevel);
1281}
1282