1//===--- ExpandFp.cpp - Expand fp instructions ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This pass expands certain floating point instructions at the IR level.
9//
10// It expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’, ‘sitofp
11// .. to’ instructions with a bitwidth above a threshold. This is
12// useful for targets like x86_64 that cannot lower fp convertions
13// with more than 128 bits.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/CodeGen/ExpandFp.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/Analysis/GlobalsModRef.h"
20#include "llvm/CodeGen/Passes.h"
21#include "llvm/CodeGen/TargetLowering.h"
22#include "llvm/CodeGen/TargetPassConfig.h"
23#include "llvm/CodeGen/TargetSubtargetInfo.h"
24#include "llvm/IR/IRBuilder.h"
25#include "llvm/IR/InstIterator.h"
26#include "llvm/IR/PassManager.h"
27#include "llvm/InitializePasses.h"
28#include "llvm/Pass.h"
29#include "llvm/Support/CommandLine.h"
30#include "llvm/Target/TargetMachine.h"
31
32using namespace llvm;
33
34static cl::opt<unsigned>
35 ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
36 cl::init(Val: llvm::IntegerType::MAX_INT_BITS),
37 cl::desc("fp convert instructions on integers with "
38 "more than <N> bits are expanded."));
39
40// clang-format off: preserve formatting of the following example
41
42/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
43/// the generated code. This currently generates code similarly to compiler-rt's
44/// implementations.
45///
46/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
47/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
48/// entry:
49/// %0 = bitcast float %a to i32
50/// %conv.i = zext i32 %0 to i64
51/// %tobool.not = icmp sgt i32 %0, -1
52/// %conv = select i1 %tobool.not, i64 1, i64 -1
53/// %and = lshr i64 %conv.i, 23
54/// %shr = and i64 %and, 255
55/// %and2 = and i64 %conv.i, 8388607
56/// %or = or i64 %and2, 8388608
57/// %cmp = icmp ult i64 %shr, 127
58/// br i1 %cmp, label %cleanup, label %if.end
59///
60/// if.end: ; preds = %entry
61/// %sub = add nuw nsw i64 %shr, 4294967169
62/// %conv5 = and i64 %sub, 4294967232
63/// %cmp6.not = icmp eq i64 %conv5, 0
64/// br i1 %cmp6.not, label %if.end12, label %if.then8
65///
66/// if.then8: ; preds = %if.end
67/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 -9223372036854775808
68/// br label %cleanup
69///
70/// if.end12: ; preds = %if.end
71/// %cmp13 = icmp ult i64 %shr, 150
72/// br i1 %cmp13, label %if.then15, label %if.else
73///
74/// if.then15: ; preds = %if.end12
75/// %sub16 = sub nuw nsw i64 150, %shr
76/// %shr17 = lshr i64 %or, %sub16
77/// %mul = mul nsw i64 %shr17, %conv
78/// br label %cleanup
79///
80/// if.else: ; preds = %if.end12
81/// %sub18 = add nsw i64 %shr, -150
82/// %shl = shl i64 %or, %sub18
83/// %mul19 = mul nsw i64 %shl, %conv
84/// br label %cleanup
85///
86/// cleanup: ; preds = %entry, %if.else, %if.then15, %if.then8
87/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ %mul19, %if.else ], [ 0, %entry ]
88/// ret i64 %retval.0
89/// }
90///
91/// Replace fp to integer with generated code.
92static void expandFPToI(Instruction *FPToI) {
93 // clang-format on
94 IRBuilder<> Builder(FPToI);
95 auto *FloatVal = FPToI->getOperand(i: 0);
96 IntegerType *IntTy = cast<IntegerType>(Val: FPToI->getType());
97
98 unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
99 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
100
101 // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
102 // to i32 first following a sext/zext to target integer type.
103 Value *A1 = nullptr;
104 if (FloatVal->getType()->isHalfTy()) {
105 if (FPToI->getOpcode() == Instruction::FPToUI) {
106 Value *A0 = Builder.CreateFPToUI(V: FloatVal, DestTy: Builder.getIntNTy(N: 32));
107 A1 = Builder.CreateZExt(V: A0, DestTy: IntTy);
108 } else { // FPToSI
109 Value *A0 = Builder.CreateFPToSI(V: FloatVal, DestTy: Builder.getIntNTy(N: 32));
110 A1 = Builder.CreateSExt(V: A0, DestTy: IntTy);
111 }
112 FPToI->replaceAllUsesWith(V: A1);
113 FPToI->dropAllReferences();
114 FPToI->eraseFromParent();
115 return;
116 }
117
118 // fp80 conversion is implemented by fpext to fp128 first then do the
119 // conversion.
120 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
121 unsigned FloatWidth =
122 PowerOf2Ceil(A: FloatVal->getType()->getScalarSizeInBits());
123 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
124 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
125 Value *ImplicitBit = Builder.CreateShl(
126 LHS: Builder.getIntN(N: BitWidth, C: 1), RHS: Builder.getIntN(N: BitWidth, C: FPMantissaWidth));
127 Value *SignificandMask =
128 Builder.CreateSub(LHS: ImplicitBit, RHS: Builder.getIntN(N: BitWidth, C: 1));
129 Value *NegOne = Builder.CreateSExt(
130 V: ConstantInt::getSigned(Ty: Builder.getInt32Ty(), V: -1), DestTy: IntTy);
131 Value *NegInf =
132 Builder.CreateShl(LHS: ConstantInt::getSigned(Ty: IntTy, V: 1),
133 RHS: ConstantInt::getSigned(Ty: IntTy, V: BitWidth - 1));
134
135 BasicBlock *Entry = Builder.GetInsertBlock();
136 Function *F = Entry->getParent();
137 Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
138 BasicBlock *End =
139 Entry->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "fp-to-i-cleanup");
140 BasicBlock *IfEnd =
141 BasicBlock::Create(Context&: Builder.getContext(), Name: "fp-to-i-if-end", Parent: F, InsertBefore: End);
142 BasicBlock *IfThen5 =
143 BasicBlock::Create(Context&: Builder.getContext(), Name: "fp-to-i-if-then5", Parent: F, InsertBefore: End);
144 BasicBlock *IfEnd9 =
145 BasicBlock::Create(Context&: Builder.getContext(), Name: "fp-to-i-if-end9", Parent: F, InsertBefore: End);
146 BasicBlock *IfThen12 =
147 BasicBlock::Create(Context&: Builder.getContext(), Name: "fp-to-i-if-then12", Parent: F, InsertBefore: End);
148 BasicBlock *IfElse =
149 BasicBlock::Create(Context&: Builder.getContext(), Name: "fp-to-i-if-else", Parent: F, InsertBefore: End);
150
151 Entry->getTerminator()->eraseFromParent();
152
153 // entry:
154 Builder.SetInsertPoint(Entry);
155 Value *FloatVal0 = FloatVal;
156 // fp80 conversion is implemented by fpext to fp128 first then do the
157 // conversion.
158 if (FloatVal->getType()->isX86_FP80Ty())
159 FloatVal0 =
160 Builder.CreateFPExt(V: FloatVal, DestTy: Type::getFP128Ty(C&: Builder.getContext()));
161 Value *ARep0 =
162 Builder.CreateBitCast(V: FloatVal0, DestTy: Builder.getIntNTy(N: FloatWidth));
163 Value *ARep = Builder.CreateZExt(V: ARep0, DestTy: FPToI->getType());
164 Value *PosOrNeg = Builder.CreateICmpSGT(
165 LHS: ARep0, RHS: ConstantInt::getSigned(Ty: Builder.getIntNTy(N: FloatWidth), V: -1));
166 Value *Sign = Builder.CreateSelect(C: PosOrNeg, True: ConstantInt::getSigned(Ty: IntTy, V: 1),
167 False: ConstantInt::getSigned(Ty: IntTy, V: -1));
168 Value *And =
169 Builder.CreateLShr(LHS: ARep, RHS: Builder.getIntN(N: BitWidth, C: FPMantissaWidth));
170 Value *And2 = Builder.CreateAnd(
171 LHS: And, RHS: Builder.getIntN(N: BitWidth, C: (1 << ExponentWidth) - 1));
172 Value *Abs = Builder.CreateAnd(LHS: ARep, RHS: SignificandMask);
173 Value *Or = Builder.CreateOr(LHS: Abs, RHS: ImplicitBit);
174 Value *Cmp =
175 Builder.CreateICmpULT(LHS: And2, RHS: Builder.getIntN(N: BitWidth, C: ExponentBias));
176 Builder.CreateCondBr(Cond: Cmp, True: End, False: IfEnd);
177
178 // if.end:
179 Builder.SetInsertPoint(IfEnd);
180 Value *Add1 = Builder.CreateAdd(
181 LHS: And2, RHS: ConstantInt::getSigned(
182 Ty: IntTy, V: -static_cast<int64_t>(ExponentBias + BitWidth)));
183 Value *Cmp3 = Builder.CreateICmpULT(
184 LHS: Add1, RHS: ConstantInt::getSigned(Ty: IntTy, V: -static_cast<int64_t>(BitWidth)));
185 Builder.CreateCondBr(Cond: Cmp3, True: IfThen5, False: IfEnd9);
186
187 // if.then5:
188 Builder.SetInsertPoint(IfThen5);
189 Value *PosInf = Builder.CreateXor(LHS: NegOne, RHS: NegInf);
190 Value *Cond8 = Builder.CreateSelect(C: PosOrNeg, True: PosInf, False: NegInf);
191 Builder.CreateBr(Dest: End);
192
193 // if.end9:
194 Builder.SetInsertPoint(IfEnd9);
195 Value *Cmp10 = Builder.CreateICmpULT(
196 LHS: And2, RHS: Builder.getIntN(N: BitWidth, C: ExponentBias + FPMantissaWidth));
197 Builder.CreateCondBr(Cond: Cmp10, True: IfThen12, False: IfElse);
198
199 // if.then12:
200 Builder.SetInsertPoint(IfThen12);
201 Value *Sub13 = Builder.CreateSub(
202 LHS: Builder.getIntN(N: BitWidth, C: ExponentBias + FPMantissaWidth), RHS: And2);
203 Value *Shr14 = Builder.CreateLShr(LHS: Or, RHS: Sub13);
204 Value *Mul = Builder.CreateMul(LHS: Shr14, RHS: Sign);
205 Builder.CreateBr(Dest: End);
206
207 // if.else:
208 Builder.SetInsertPoint(IfElse);
209 Value *Sub15 = Builder.CreateAdd(
210 LHS: And2, RHS: ConstantInt::getSigned(
211 Ty: IntTy, V: -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
212 Value *Shl = Builder.CreateShl(LHS: Or, RHS: Sub15);
213 Value *Mul16 = Builder.CreateMul(LHS: Shl, RHS: Sign);
214 Builder.CreateBr(Dest: End);
215
216 // cleanup:
217 Builder.SetInsertPoint(TheBB: End, IP: End->begin());
218 PHINode *Retval0 = Builder.CreatePHI(Ty: FPToI->getType(), NumReservedValues: 4);
219
220 Retval0->addIncoming(V: Cond8, BB: IfThen5);
221 Retval0->addIncoming(V: Mul, BB: IfThen12);
222 Retval0->addIncoming(V: Mul16, BB: IfElse);
223 Retval0->addIncoming(V: Builder.getIntN(N: BitWidth, C: 0), BB: Entry);
224
225 FPToI->replaceAllUsesWith(V: Retval0);
226 FPToI->dropAllReferences();
227 FPToI->eraseFromParent();
228}
229
230// clang-format off: preserve formatting of the following example
231
232/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
233/// the generated code. This currently generates code similarly to compiler-rt's
234/// implementations. This implementation has an implicit assumption that integer
235/// width is larger than fp.
236///
237/// An example IR generated from compiler-rt/floatdisf.c looks like below:
238/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
239/// entry:
240/// %cmp = icmp eq i64 %a, 0
241/// br i1 %cmp, label %return, label %if.end
242///
243/// if.end: ; preds = %entry
244/// %shr = ashr i64 %a, 63
245/// %xor = xor i64 %shr, %a
246/// %sub = sub nsw i64 %xor, %shr
247/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
248/// %cast = trunc i64 %0 to i32
249/// %sub1 = sub nuw nsw i32 64, %cast
250/// %sub2 = xor i32 %cast, 63
251/// %cmp3 = icmp ult i32 %cast, 40
252/// br i1 %cmp3, label %if.then4, label %if.else
253///
254/// if.then4: ; preds = %if.end
255/// switch i32 %sub1, label %sw.default [
256/// i32 25, label %sw.bb
257/// i32 26, label %sw.epilog
258/// ]
259///
260/// sw.bb: ; preds = %if.then4
261/// %shl = shl i64 %sub, 1
262/// br label %sw.epilog
263///
264/// sw.default: ; preds = %if.then4
265/// %sub5 = sub nsw i64 38, %0
266/// %sh_prom = and i64 %sub5, 4294967295
267/// %shr6 = lshr i64 %sub, %sh_prom
268/// %shr9 = lshr i64 274877906943, %0
269/// %and = and i64 %shr9, %sub
270/// %cmp10 = icmp ne i64 %and, 0
271/// %conv11 = zext i1 %cmp10 to i64
272/// %or = or i64 %shr6, %conv11
273/// br label %sw.epilog
274///
275/// sw.epilog: ; preds = %sw.default, %if.then4, %sw.bb
276/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, %sw.bb ]
277/// %1 = lshr i64 %a.addr.0, 2
278/// %2 = and i64 %1, 1
279/// %or16 = or i64 %2, %a.addr.0
280/// %inc = add nsw i64 %or16, 1
281/// %3 = and i64 %inc, 67108864
282/// %tobool.not = icmp eq i64 %3, 0
283/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
284/// %spec.select = ashr i64 %inc, %spec.select.v
285/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
286/// br label %if.end26
287///
288/// if.else: ; preds = %if.end
289/// %sub23 = add nuw nsw i64 %0, 4294967256
290/// %sh_prom24 = and i64 %sub23, 4294967295
291/// %shl25 = shl i64 %sub, %sh_prom24
292/// br label %if.end26
293///
294/// if.end26: ; preds = %sw.epilog, %if.else
295/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
296/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
297/// %conv27 = trunc i64 %shr to i32
298/// %and28 = and i32 %conv27, -2147483648
299/// %add = shl nuw nsw i32 %e.0, 23
300/// %shl29 = add nuw nsw i32 %add, 1065353216
301/// %conv31 = trunc i64 %a.addr.1 to i32
302/// %and32 = and i32 %conv31, 8388607
303/// %or30 = or i32 %and32, %and28
304/// %or33 = or i32 %or30, %shl29
305/// %4 = bitcast i32 %or33 to float
306/// br label %return
307///
308/// return: ; preds = %entry, %if.end26
309/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
310/// ret float %retval.0
311/// }
312///
313/// Replace integer to fp with generated code.
314static void expandIToFP(Instruction *IToFP) {
315 // clang-format on
316 IRBuilder<> Builder(IToFP);
317 auto *IntVal = IToFP->getOperand(i: 0);
318 IntegerType *IntTy = cast<IntegerType>(Val: IntVal->getType());
319
320 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
321 unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
322 // fp80 conversion is implemented by conversion tp fp128 first following
323 // a fptrunc to fp80.
324 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
325 // FIXME: As there is no related builtins added in compliler-rt,
326 // here currently utilized the fp32 <-> fp16 lib calls to implement.
327 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
328 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
329 unsigned FloatWidth = PowerOf2Ceil(A: FPMantissaWidth);
330 bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
331
332 assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "
333 "assumes integer width is larger than fp.");
334
335 Value *Temp1 =
336 Builder.CreateShl(LHS: Builder.getIntN(N: BitWidth, C: 1),
337 RHS: Builder.getIntN(N: BitWidth, C: FPMantissaWidth + 3));
338
339 BasicBlock *Entry = Builder.GetInsertBlock();
340 Function *F = Entry->getParent();
341 Entry->setName(Twine(Entry->getName(), "itofp-entry"));
342 BasicBlock *End =
343 Entry->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "itofp-return");
344 BasicBlock *IfEnd =
345 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-end", Parent: F, InsertBefore: End);
346 BasicBlock *IfThen4 =
347 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-then4", Parent: F, InsertBefore: End);
348 BasicBlock *SwBB =
349 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-sw-bb", Parent: F, InsertBefore: End);
350 BasicBlock *SwDefault =
351 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-sw-default", Parent: F, InsertBefore: End);
352 BasicBlock *SwEpilog =
353 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-sw-epilog", Parent: F, InsertBefore: End);
354 BasicBlock *IfThen20 =
355 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-then20", Parent: F, InsertBefore: End);
356 BasicBlock *IfElse =
357 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-else", Parent: F, InsertBefore: End);
358 BasicBlock *IfEnd26 =
359 BasicBlock::Create(Context&: Builder.getContext(), Name: "itofp-if-end26", Parent: F, InsertBefore: End);
360
361 Entry->getTerminator()->eraseFromParent();
362
363 Function *CTLZ =
364 Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: Intrinsic::ctlz, Tys: IntTy);
365 ConstantInt *True = Builder.getTrue();
366
367 // entry:
368 Builder.SetInsertPoint(Entry);
369 Value *Cmp = Builder.CreateICmpEQ(LHS: IntVal, RHS: ConstantInt::getSigned(Ty: IntTy, V: 0));
370 Builder.CreateCondBr(Cond: Cmp, True: End, False: IfEnd);
371
372 // if.end:
373 Builder.SetInsertPoint(IfEnd);
374 Value *Shr =
375 Builder.CreateAShr(LHS: IntVal, RHS: Builder.getIntN(N: BitWidth, C: BitWidth - 1));
376 Value *Xor = Builder.CreateXor(LHS: Shr, RHS: IntVal);
377 Value *Sub = Builder.CreateSub(LHS: Xor, RHS: Shr);
378 Value *Call = Builder.CreateCall(Callee: CTLZ, Args: {IsSigned ? Sub : IntVal, True});
379 Value *Cast = Builder.CreateTrunc(V: Call, DestTy: Builder.getInt32Ty());
380 int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
381 Value *Sub1 = Builder.CreateSub(LHS: Builder.getIntN(N: BitWidthNew, C: BitWidth),
382 RHS: FloatWidth == 128 ? Call : Cast);
383 Value *Sub2 = Builder.CreateSub(LHS: Builder.getIntN(N: BitWidthNew, C: BitWidth - 1),
384 RHS: FloatWidth == 128 ? Call : Cast);
385 Value *Cmp3 = Builder.CreateICmpSGT(
386 LHS: Sub1, RHS: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + 1));
387 Builder.CreateCondBr(Cond: Cmp3, True: IfThen4, False: IfElse);
388
389 // if.then4:
390 Builder.SetInsertPoint(IfThen4);
391 llvm::SwitchInst *SI = Builder.CreateSwitch(V: Sub1, Dest: SwDefault);
392 SI->addCase(OnVal: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + 2), Dest: SwBB);
393 SI->addCase(OnVal: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + 3), Dest: SwEpilog);
394
395 // sw.bb:
396 Builder.SetInsertPoint(SwBB);
397 Value *Shl =
398 Builder.CreateShl(LHS: IsSigned ? Sub : IntVal, RHS: Builder.getIntN(N: BitWidth, C: 1));
399 Builder.CreateBr(Dest: SwEpilog);
400
401 // sw.default:
402 Builder.SetInsertPoint(SwDefault);
403 Value *Sub5 = Builder.CreateSub(
404 LHS: Builder.getIntN(N: BitWidthNew, C: BitWidth - FPMantissaWidth - 3),
405 RHS: FloatWidth == 128 ? Call : Cast);
406 Value *ShProm = Builder.CreateZExt(V: Sub5, DestTy: IntTy);
407 Value *Shr6 = Builder.CreateLShr(LHS: IsSigned ? Sub : IntVal,
408 RHS: FloatWidth == 128 ? Sub5 : ShProm);
409 Value *Sub8 =
410 Builder.CreateAdd(LHS: FloatWidth == 128 ? Call : Cast,
411 RHS: Builder.getIntN(N: BitWidthNew, C: FPMantissaWidth + 3));
412 Value *ShProm9 = Builder.CreateZExt(V: Sub8, DestTy: IntTy);
413 Value *Shr9 = Builder.CreateLShr(LHS: ConstantInt::getSigned(Ty: IntTy, V: -1),
414 RHS: FloatWidth == 128 ? Sub8 : ShProm9);
415 Value *And = Builder.CreateAnd(LHS: Shr9, RHS: IsSigned ? Sub : IntVal);
416 Value *Cmp10 = Builder.CreateICmpNE(LHS: And, RHS: Builder.getIntN(N: BitWidth, C: 0));
417 Value *Conv11 = Builder.CreateZExt(V: Cmp10, DestTy: IntTy);
418 Value *Or = Builder.CreateOr(LHS: Shr6, RHS: Conv11);
419 Builder.CreateBr(Dest: SwEpilog);
420
421 // sw.epilog:
422 Builder.SetInsertPoint(SwEpilog);
423 PHINode *AAddr0 = Builder.CreatePHI(Ty: IntTy, NumReservedValues: 3);
424 AAddr0->addIncoming(V: Or, BB: SwDefault);
425 AAddr0->addIncoming(V: IsSigned ? Sub : IntVal, BB: IfThen4);
426 AAddr0->addIncoming(V: Shl, BB: SwBB);
427 Value *A0 = Builder.CreateTrunc(V: AAddr0, DestTy: Builder.getInt32Ty());
428 Value *A1 = Builder.CreateLShr(LHS: A0, RHS: Builder.getIntN(N: 32, C: 2));
429 Value *A2 = Builder.CreateAnd(LHS: A1, RHS: Builder.getIntN(N: 32, C: 1));
430 Value *Conv16 = Builder.CreateZExt(V: A2, DestTy: IntTy);
431 Value *Or17 = Builder.CreateOr(LHS: AAddr0, RHS: Conv16);
432 Value *Inc = Builder.CreateAdd(LHS: Or17, RHS: Builder.getIntN(N: BitWidth, C: 1));
433 Value *Shr18 = nullptr;
434 if (IsSigned)
435 Shr18 = Builder.CreateAShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: 2));
436 else
437 Shr18 = Builder.CreateLShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: 2));
438 Value *A3 = Builder.CreateAnd(LHS: Inc, RHS: Temp1, Name: "a3");
439 Value *PosOrNeg = Builder.CreateICmpEQ(LHS: A3, RHS: Builder.getIntN(N: BitWidth, C: 0));
440 Value *ExtractT60 = Builder.CreateTrunc(V: Shr18, DestTy: Builder.getIntNTy(N: FloatWidth));
441 Value *Extract63 = Builder.CreateLShr(LHS: Shr18, RHS: Builder.getIntN(N: BitWidth, C: 32));
442 Value *ExtractT64 = nullptr;
443 if (FloatWidth > 80)
444 ExtractT64 = Builder.CreateTrunc(V: Sub2, DestTy: Builder.getInt64Ty());
445 else
446 ExtractT64 = Builder.CreateTrunc(V: Extract63, DestTy: Builder.getInt32Ty());
447 Builder.CreateCondBr(Cond: PosOrNeg, True: IfEnd26, False: IfThen20);
448
449 // if.then20
450 Builder.SetInsertPoint(IfThen20);
451 Value *Shr21 = nullptr;
452 if (IsSigned)
453 Shr21 = Builder.CreateAShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: 3));
454 else
455 Shr21 = Builder.CreateLShr(LHS: Inc, RHS: Builder.getIntN(N: BitWidth, C: 3));
456 Value *ExtractT = Builder.CreateTrunc(V: Shr21, DestTy: Builder.getIntNTy(N: FloatWidth));
457 Value *Extract = Builder.CreateLShr(LHS: Shr21, RHS: Builder.getIntN(N: BitWidth, C: 32));
458 Value *ExtractT62 = nullptr;
459 if (FloatWidth > 80)
460 ExtractT62 = Builder.CreateTrunc(V: Sub1, DestTy: Builder.getIntNTy(N: 64));
461 else
462 ExtractT62 = Builder.CreateTrunc(V: Extract, DestTy: Builder.getIntNTy(N: 32));
463 Builder.CreateBr(Dest: IfEnd26);
464
465 // if.else:
466 Builder.SetInsertPoint(IfElse);
467 Value *Sub24 = Builder.CreateAdd(
468 LHS: FloatWidth == 128 ? Call : Cast,
469 RHS: ConstantInt::getSigned(Ty: Builder.getIntNTy(N: BitWidthNew),
470 V: -(BitWidth - FPMantissaWidth - 1)));
471 Value *ShProm25 = Builder.CreateZExt(V: Sub24, DestTy: IntTy);
472 Value *Shl26 = Builder.CreateShl(LHS: IsSigned ? Sub : IntVal,
473 RHS: FloatWidth == 128 ? Sub24 : ShProm25);
474 Value *ExtractT61 = Builder.CreateTrunc(V: Shl26, DestTy: Builder.getIntNTy(N: FloatWidth));
475 Value *Extract65 = Builder.CreateLShr(LHS: Shl26, RHS: Builder.getIntN(N: BitWidth, C: 32));
476 Value *ExtractT66 = nullptr;
477 if (FloatWidth > 80)
478 ExtractT66 = Builder.CreateTrunc(V: Sub2, DestTy: Builder.getIntNTy(N: 64));
479 else
480 ExtractT66 = Builder.CreateTrunc(V: Extract65, DestTy: Builder.getInt32Ty());
481 Builder.CreateBr(Dest: IfEnd26);
482
483 // if.end26:
484 Builder.SetInsertPoint(IfEnd26);
485 PHINode *AAddr1Off0 = Builder.CreatePHI(Ty: Builder.getIntNTy(N: FloatWidth), NumReservedValues: 3);
486 AAddr1Off0->addIncoming(V: ExtractT, BB: IfThen20);
487 AAddr1Off0->addIncoming(V: ExtractT60, BB: SwEpilog);
488 AAddr1Off0->addIncoming(V: ExtractT61, BB: IfElse);
489 PHINode *AAddr1Off32 = nullptr;
490 if (FloatWidth > 32) {
491 AAddr1Off32 =
492 Builder.CreatePHI(Ty: Builder.getIntNTy(N: FloatWidth > 80 ? 64 : 32), NumReservedValues: 3);
493 AAddr1Off32->addIncoming(V: ExtractT62, BB: IfThen20);
494 AAddr1Off32->addIncoming(V: ExtractT64, BB: SwEpilog);
495 AAddr1Off32->addIncoming(V: ExtractT66, BB: IfElse);
496 }
497 PHINode *E0 = nullptr;
498 if (FloatWidth <= 80) {
499 E0 = Builder.CreatePHI(Ty: Builder.getIntNTy(N: BitWidthNew), NumReservedValues: 3);
500 E0->addIncoming(V: Sub1, BB: IfThen20);
501 E0->addIncoming(V: Sub2, BB: SwEpilog);
502 E0->addIncoming(V: Sub2, BB: IfElse);
503 }
504 Value *And29 = nullptr;
505 if (FloatWidth > 80) {
506 Value *Temp2 = Builder.CreateShl(LHS: Builder.getIntN(N: BitWidth, C: 1),
507 RHS: Builder.getIntN(N: BitWidth, C: 63));
508 And29 = Builder.CreateAnd(LHS: Shr, RHS: Temp2, Name: "and29");
509 } else {
510 Value *Conv28 = Builder.CreateTrunc(V: Shr, DestTy: Builder.getIntNTy(N: 32));
511 And29 = Builder.CreateAnd(
512 LHS: Conv28, RHS: ConstantInt::getSigned(Ty: Builder.getIntNTy(N: 32), V: 0x80000000));
513 }
514 unsigned TempMod = FPMantissaWidth % 32;
515 Value *And34 = nullptr;
516 Value *Shl30 = nullptr;
517 if (FloatWidth > 80) {
518 TempMod += 32;
519 Value *Add = Builder.CreateShl(LHS: AAddr1Off32, RHS: Builder.getIntN(N: 64, C: TempMod));
520 Shl30 = Builder.CreateAdd(
521 LHS: Add,
522 RHS: Builder.getIntN(N: 64, C: ((1ull << (62ull - TempMod)) - 1ull) << TempMod));
523 And34 = Builder.CreateZExt(V: Shl30, DestTy: Builder.getIntNTy(N: 128));
524 } else {
525 Value *Add = Builder.CreateShl(LHS: E0, RHS: Builder.getIntN(N: 32, C: TempMod));
526 Shl30 = Builder.CreateAdd(
527 LHS: Add, RHS: Builder.getIntN(N: 32, C: ((1 << (30 - TempMod)) - 1) << TempMod));
528 And34 = Builder.CreateAnd(LHS: FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
529 RHS: Builder.getIntN(N: 32, C: (1 << TempMod) - 1));
530 }
531 Value *Or35 = nullptr;
532 if (FloatWidth > 80) {
533 Value *And29Trunc = Builder.CreateTrunc(V: And29, DestTy: Builder.getIntNTy(N: 128));
534 Value *Or31 = Builder.CreateOr(LHS: And29Trunc, RHS: And34);
535 Value *Or34 = Builder.CreateShl(LHS: Or31, RHS: Builder.getIntN(N: 128, C: 64));
536 Value *Temp3 = Builder.CreateShl(LHS: Builder.getIntN(N: 128, C: 1),
537 RHS: Builder.getIntN(N: 128, C: FPMantissaWidth));
538 Value *Temp4 = Builder.CreateSub(LHS: Temp3, RHS: Builder.getIntN(N: 128, C: 1));
539 Value *A6 = Builder.CreateAnd(LHS: AAddr1Off0, RHS: Temp4);
540 Or35 = Builder.CreateOr(LHS: Or34, RHS: A6);
541 } else {
542 Value *Or31 = Builder.CreateOr(LHS: And34, RHS: And29);
543 Or35 = Builder.CreateOr(LHS: IsSigned ? Or31 : And34, RHS: Shl30);
544 }
545 Value *A4 = nullptr;
546 if (IToFP->getType()->isDoubleTy()) {
547 Value *ZExt1 = Builder.CreateZExt(V: Or35, DestTy: Builder.getIntNTy(N: FloatWidth));
548 Value *Shl1 = Builder.CreateShl(LHS: ZExt1, RHS: Builder.getIntN(N: FloatWidth, C: 32));
549 Value *And1 =
550 Builder.CreateAnd(LHS: AAddr1Off0, RHS: Builder.getIntN(N: FloatWidth, C: 0xFFFFFFFF));
551 Value *Or1 = Builder.CreateOr(LHS: Shl1, RHS: And1);
552 A4 = Builder.CreateBitCast(V: Or1, DestTy: IToFP->getType());
553 } else if (IToFP->getType()->isX86_FP80Ty()) {
554 Value *A40 =
555 Builder.CreateBitCast(V: Or35, DestTy: Type::getFP128Ty(C&: Builder.getContext()));
556 A4 = Builder.CreateFPTrunc(V: A40, DestTy: IToFP->getType());
557 } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {
558 // Deal with "half" situation. This is a workaround since we don't have
559 // floattihf.c currently as referring.
560 Value *A40 =
561 Builder.CreateBitCast(V: Or35, DestTy: Type::getFloatTy(C&: Builder.getContext()));
562 A4 = Builder.CreateFPTrunc(V: A40, DestTy: IToFP->getType());
563 } else // float type
564 A4 = Builder.CreateBitCast(V: Or35, DestTy: IToFP->getType());
565 Builder.CreateBr(Dest: End);
566
567 // return:
568 Builder.SetInsertPoint(TheBB: End, IP: End->begin());
569 PHINode *Retval0 = Builder.CreatePHI(Ty: IToFP->getType(), NumReservedValues: 2);
570 Retval0->addIncoming(V: A4, BB: IfEnd26);
571 Retval0->addIncoming(V: ConstantFP::getZero(Ty: IToFP->getType(), Negative: false), BB: Entry);
572
573 IToFP->replaceAllUsesWith(V: Retval0);
574 IToFP->dropAllReferences();
575 IToFP->eraseFromParent();
576}
577
578static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) {
579 VectorType *VTy = cast<FixedVectorType>(Val: I->getType());
580
581 IRBuilder<> Builder(I);
582
583 unsigned NumElements = VTy->getElementCount().getFixedValue();
584 Value *Result = PoisonValue::get(T: VTy);
585 for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
586 Value *Ext = Builder.CreateExtractElement(Vec: I->getOperand(i: 0), Idx);
587 Value *Cast = Builder.CreateCast(Op: cast<CastInst>(Val: I)->getOpcode(), V: Ext,
588 DestTy: I->getType()->getScalarType());
589 Result = Builder.CreateInsertElement(Vec: Result, NewElt: Cast, Idx);
590 if (isa<Instruction>(Val: Cast))
591 Replace.push_back(Elt: cast<Instruction>(Val: Cast));
592 }
593 I->replaceAllUsesWith(V: Result);
594 I->dropAllReferences();
595 I->eraseFromParent();
596}
597
598static bool runImpl(Function &F, const TargetLowering &TLI) {
599 SmallVector<Instruction *, 4> Replace;
600 SmallVector<Instruction *, 4> ReplaceVector;
601 bool Modified = false;
602
603 unsigned MaxLegalFpConvertBitWidth =
604 TLI.getMaxLargeFPConvertBitWidthSupported();
605 if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS)
606 MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
607
608 if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)
609 return false;
610
611 for (auto &I : instructions(F)) {
612 switch (I.getOpcode()) {
613 case Instruction::FPToUI:
614 case Instruction::FPToSI: {
615 // TODO: This pass doesn't handle scalable vectors.
616 if (I.getOperand(i: 0)->getType()->isScalableTy())
617 continue;
618
619 auto *IntTy = cast<IntegerType>(Val: I.getType()->getScalarType());
620 if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
621 continue;
622
623 if (I.getOperand(i: 0)->getType()->isVectorTy())
624 ReplaceVector.push_back(Elt: &I);
625 else
626 Replace.push_back(Elt: &I);
627 Modified = true;
628 break;
629 }
630 case Instruction::UIToFP:
631 case Instruction::SIToFP: {
632 // TODO: This pass doesn't handle scalable vectors.
633 if (I.getOperand(i: 0)->getType()->isScalableTy())
634 continue;
635
636 auto *IntTy =
637 cast<IntegerType>(Val: I.getOperand(i: 0)->getType()->getScalarType());
638 if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
639 continue;
640
641 if (I.getOperand(i: 0)->getType()->isVectorTy())
642 ReplaceVector.push_back(Elt: &I);
643 else
644 Replace.push_back(Elt: &I);
645 Modified = true;
646 break;
647 }
648 default:
649 break;
650 }
651 }
652
653 while (!ReplaceVector.empty()) {
654 Instruction *I = ReplaceVector.pop_back_val();
655 scalarize(I, Replace);
656 }
657
658 if (Replace.empty())
659 return false;
660
661 while (!Replace.empty()) {
662 Instruction *I = Replace.pop_back_val();
663 if (I->getOpcode() == Instruction::FPToUI ||
664 I->getOpcode() == Instruction::FPToSI) {
665 expandFPToI(FPToI: I);
666 } else {
667 expandIToFP(IToFP: I);
668 }
669 }
670
671 return Modified;
672}
673
674namespace {
675class ExpandFpLegacyPass : public FunctionPass {
676public:
677 static char ID;
678
679 ExpandFpLegacyPass() : FunctionPass(ID) {
680 initializeExpandFpLegacyPassPass(*PassRegistry::getPassRegistry());
681 }
682
683 bool runOnFunction(Function &F) override {
684 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
685 auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
686 return runImpl(F, TLI: *TLI);
687 }
688
689 void getAnalysisUsage(AnalysisUsage &AU) const override {
690 AU.addRequired<TargetPassConfig>();
691 AU.addPreserved<AAResultsWrapperPass>();
692 AU.addPreserved<GlobalsAAWrapperPass>();
693 }
694};
695} // namespace
696
697PreservedAnalyses ExpandFpPass::run(Function &F, FunctionAnalysisManager &FAM) {
698 const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
699 return runImpl(F, TLI: *STI->getTargetLowering()) ? PreservedAnalyses::none()
700 : PreservedAnalyses::all();
701}
702
703char ExpandFpLegacyPass::ID = 0;
704INITIALIZE_PASS_BEGIN(ExpandFpLegacyPass, "expand-fp",
705 "Expand certain fp instructions", false, false)
706INITIALIZE_PASS_END(ExpandFpLegacyPass, "expand-fp", "Expand fp", false, false)
707
708FunctionPass *llvm::createExpandFpPass() { return new ExpandFpLegacyPass(); }
709