1 | //===- Float2Int.cpp - Demote floating point ops to work on integers ------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the Float2Int pass, which aims to demote floating |
10 | // point operations to work on integers, where that is losslessly possible. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/Transforms/Scalar/Float2Int.h" |
15 | #include "llvm/ADT/APInt.h" |
16 | #include "llvm/ADT/APSInt.h" |
17 | #include "llvm/ADT/SmallVector.h" |
18 | #include "llvm/Analysis/GlobalsModRef.h" |
19 | #include "llvm/IR/Constants.h" |
20 | #include "llvm/IR/Dominators.h" |
21 | #include "llvm/IR/IRBuilder.h" |
22 | #include "llvm/IR/Module.h" |
23 | #include "llvm/Support/CommandLine.h" |
24 | #include "llvm/Support/Debug.h" |
25 | #include "llvm/Support/raw_ostream.h" |
26 | #include <deque> |
27 | |
28 | #define DEBUG_TYPE "float2int" |
29 | |
30 | using namespace llvm; |
31 | |
32 | // The algorithm is simple. Start at instructions that convert from the |
33 | // float to the int domain: fptoui, fptosi and fcmp. Walk up the def-use |
34 | // graph, using an equivalence datastructure to unify graphs that interfere. |
35 | // |
36 | // Mappable instructions are those with an integer corrollary that, given |
37 | // integer domain inputs, produce an integer output; fadd, for example. |
38 | // |
39 | // If a non-mappable instruction is seen, this entire def-use graph is marked |
40 | // as non-transformable. If we see an instruction that converts from the |
41 | // integer domain to FP domain (uitofp,sitofp), we terminate our walk. |
42 | |
43 | /// The largest integer type worth dealing with. |
44 | static cl::opt<unsigned> |
45 | MaxIntegerBW("float2int-max-integer-bw" , cl::init(Val: 64), cl::Hidden, |
46 | cl::desc("Max integer bitwidth to consider in float2int" |
47 | "(default=64)" )); |
48 | |
49 | // Given a FCmp predicate, return a matching ICmp predicate if one |
50 | // exists, otherwise return BAD_ICMP_PREDICATE. |
51 | static CmpInst::Predicate mapFCmpPred(CmpInst::Predicate P) { |
52 | switch (P) { |
53 | case CmpInst::FCMP_OEQ: |
54 | case CmpInst::FCMP_UEQ: |
55 | return CmpInst::ICMP_EQ; |
56 | case CmpInst::FCMP_OGT: |
57 | case CmpInst::FCMP_UGT: |
58 | return CmpInst::ICMP_SGT; |
59 | case CmpInst::FCMP_OGE: |
60 | case CmpInst::FCMP_UGE: |
61 | return CmpInst::ICMP_SGE; |
62 | case CmpInst::FCMP_OLT: |
63 | case CmpInst::FCMP_ULT: |
64 | return CmpInst::ICMP_SLT; |
65 | case CmpInst::FCMP_OLE: |
66 | case CmpInst::FCMP_ULE: |
67 | return CmpInst::ICMP_SLE; |
68 | case CmpInst::FCMP_ONE: |
69 | case CmpInst::FCMP_UNE: |
70 | return CmpInst::ICMP_NE; |
71 | default: |
72 | return CmpInst::BAD_ICMP_PREDICATE; |
73 | } |
74 | } |
75 | |
76 | // Given a floating point binary operator, return the matching |
77 | // integer version. |
78 | static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) { |
79 | switch (Opcode) { |
80 | default: llvm_unreachable("Unhandled opcode!" ); |
81 | case Instruction::FAdd: return Instruction::Add; |
82 | case Instruction::FSub: return Instruction::Sub; |
83 | case Instruction::FMul: return Instruction::Mul; |
84 | } |
85 | } |
86 | |
87 | // Find the roots - instructions that convert from the FP domain to |
88 | // integer domain. |
89 | void Float2IntPass::findRoots(Function &F, const DominatorTree &DT) { |
90 | for (BasicBlock &BB : F) { |
91 | // Unreachable code can take on strange forms that we are not prepared to |
92 | // handle. For example, an instruction may have itself as an operand. |
93 | if (!DT.isReachableFromEntry(A: &BB)) |
94 | continue; |
95 | |
96 | for (Instruction &I : BB) { |
97 | if (isa<VectorType>(Val: I.getType())) |
98 | continue; |
99 | switch (I.getOpcode()) { |
100 | default: break; |
101 | case Instruction::FPToUI: |
102 | case Instruction::FPToSI: |
103 | Roots.insert(X: &I); |
104 | break; |
105 | case Instruction::FCmp: |
106 | if (mapFCmpPred(P: cast<CmpInst>(Val: &I)->getPredicate()) != |
107 | CmpInst::BAD_ICMP_PREDICATE) |
108 | Roots.insert(X: &I); |
109 | break; |
110 | } |
111 | } |
112 | } |
113 | } |
114 | |
115 | // Helper - mark I as having been traversed, having range R. |
116 | void Float2IntPass::seen(Instruction *I, ConstantRange R) { |
117 | LLVM_DEBUG(dbgs() << "F2I: " << *I << ":" << R << "\n" ); |
118 | SeenInsts.insert_or_assign(Key: I, Val: std::move(R)); |
119 | } |
120 | |
121 | // Helper - get a range representing a poison value. |
122 | ConstantRange Float2IntPass::badRange() { |
123 | return ConstantRange::getFull(BitWidth: MaxIntegerBW + 1); |
124 | } |
125 | ConstantRange Float2IntPass::unknownRange() { |
126 | return ConstantRange::getEmpty(BitWidth: MaxIntegerBW + 1); |
127 | } |
128 | ConstantRange Float2IntPass::validateRange(ConstantRange R) { |
129 | if (R.getBitWidth() > MaxIntegerBW + 1) |
130 | return badRange(); |
131 | return R; |
132 | } |
133 | |
134 | // The most obvious way to structure the search is a depth-first, eager |
135 | // search from each root. However, that require direct recursion and so |
136 | // can only handle small instruction sequences. Instead, we split the search |
137 | // up into two phases: |
138 | // - walkBackwards: A breadth-first walk of the use-def graph starting from |
139 | // the roots. Populate "SeenInsts" with interesting |
140 | // instructions and poison values if they're obvious and |
141 | // cheap to compute. Calculate the equivalance set structure |
142 | // while we're here too. |
143 | // - walkForwards: Iterate over SeenInsts in reverse order, so we visit |
144 | // defs before their uses. Calculate the real range info. |
145 | |
146 | // Breadth-first walk of the use-def graph; determine the set of nodes |
147 | // we care about and eagerly determine if some of them are poisonous. |
148 | void Float2IntPass::walkBackwards() { |
149 | std::deque<Instruction*> Worklist(Roots.begin(), Roots.end()); |
150 | while (!Worklist.empty()) { |
151 | Instruction *I = Worklist.back(); |
152 | Worklist.pop_back(); |
153 | |
154 | if (SeenInsts.contains(Key: I)) |
155 | // Seen already. |
156 | continue; |
157 | |
158 | switch (I->getOpcode()) { |
159 | // FIXME: Handle select and phi nodes. |
160 | default: |
161 | // Path terminated uncleanly. |
162 | seen(I, R: badRange()); |
163 | break; |
164 | |
165 | case Instruction::UIToFP: |
166 | case Instruction::SIToFP: { |
167 | // Path terminated cleanly - use the type of the integer input to seed |
168 | // the analysis. |
169 | unsigned BW = I->getOperand(i: 0)->getType()->getPrimitiveSizeInBits(); |
170 | auto Input = ConstantRange::getFull(BitWidth: BW); |
171 | auto CastOp = (Instruction::CastOps)I->getOpcode(); |
172 | seen(I, R: validateRange(R: Input.castOp(CastOp, BitWidth: MaxIntegerBW+1))); |
173 | continue; |
174 | } |
175 | |
176 | case Instruction::FNeg: |
177 | case Instruction::FAdd: |
178 | case Instruction::FSub: |
179 | case Instruction::FMul: |
180 | case Instruction::FPToUI: |
181 | case Instruction::FPToSI: |
182 | case Instruction::FCmp: |
183 | seen(I, R: unknownRange()); |
184 | break; |
185 | } |
186 | |
187 | for (Value *O : I->operands()) { |
188 | if (Instruction *OI = dyn_cast<Instruction>(Val: O)) { |
189 | // Unify def-use chains if they interfere. |
190 | ECs.unionSets(V1: I, V2: OI); |
191 | if (SeenInsts.find(Key: I)->second != badRange()) |
192 | Worklist.push_back(x: OI); |
193 | } else if (!isa<ConstantFP>(Val: O)) { |
194 | // Not an instruction or ConstantFP? we can't do anything. |
195 | seen(I, R: badRange()); |
196 | } |
197 | } |
198 | } |
199 | } |
200 | |
201 | // Calculate result range from operand ranges. |
202 | // Return std::nullopt if the range cannot be calculated yet. |
203 | std::optional<ConstantRange> Float2IntPass::calcRange(Instruction *I) { |
204 | SmallVector<ConstantRange, 4> OpRanges; |
205 | for (Value *O : I->operands()) { |
206 | if (Instruction *OI = dyn_cast<Instruction>(Val: O)) { |
207 | auto OpIt = SeenInsts.find(Key: OI); |
208 | assert(OpIt != SeenInsts.end() && "def not seen before use!" ); |
209 | if (OpIt->second == unknownRange()) |
210 | return std::nullopt; // Wait until operand range has been calculated. |
211 | OpRanges.push_back(Elt: OpIt->second); |
212 | } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Val: O)) { |
213 | // Work out if the floating point number can be losslessly represented |
214 | // as an integer. |
215 | // APFloat::convertToInteger(&Exact) purports to do what we want, but |
216 | // the exactness can be too precise. For example, negative zero can |
217 | // never be exactly converted to an integer. |
218 | // |
219 | // Instead, we ask APFloat to round itself to an integral value - this |
220 | // preserves sign-of-zero - then compare the result with the original. |
221 | // |
222 | const APFloat &F = CF->getValueAPF(); |
223 | |
224 | // First, weed out obviously incorrect values. Non-finite numbers |
225 | // can't be represented and neither can negative zero, unless |
226 | // we're in fast math mode. |
227 | if (!F.isFinite() || |
228 | (F.isZero() && F.isNegative() && isa<FPMathOperator>(Val: I) && |
229 | !I->hasNoSignedZeros())) |
230 | return badRange(); |
231 | |
232 | APFloat NewF = F; |
233 | auto Res = NewF.roundToIntegral(RM: APFloat::rmNearestTiesToEven); |
234 | if (Res != APFloat::opOK || NewF != F) |
235 | return badRange(); |
236 | |
237 | // OK, it's representable. Now get it. |
238 | APSInt Int(MaxIntegerBW+1, false); |
239 | bool Exact; |
240 | CF->getValueAPF().convertToInteger(Result&: Int, |
241 | RM: APFloat::rmNearestTiesToEven, |
242 | IsExact: &Exact); |
243 | OpRanges.push_back(Elt: ConstantRange(Int)); |
244 | } else { |
245 | llvm_unreachable("Should have already marked this as badRange!" ); |
246 | } |
247 | } |
248 | |
249 | switch (I->getOpcode()) { |
250 | // FIXME: Handle select and phi nodes. |
251 | default: |
252 | case Instruction::UIToFP: |
253 | case Instruction::SIToFP: |
254 | llvm_unreachable("Should have been handled in walkForwards!" ); |
255 | |
256 | case Instruction::FNeg: { |
257 | assert(OpRanges.size() == 1 && "FNeg is a unary operator!" ); |
258 | unsigned Size = OpRanges[0].getBitWidth(); |
259 | auto Zero = ConstantRange(APInt::getZero(numBits: Size)); |
260 | return Zero.sub(Other: OpRanges[0]); |
261 | } |
262 | |
263 | case Instruction::FAdd: |
264 | case Instruction::FSub: |
265 | case Instruction::FMul: { |
266 | assert(OpRanges.size() == 2 && "its a binary operator!" ); |
267 | auto BinOp = (Instruction::BinaryOps) I->getOpcode(); |
268 | return OpRanges[0].binaryOp(BinOp, Other: OpRanges[1]); |
269 | } |
270 | |
271 | // |
272 | // Root-only instructions - we'll only see these if they're the |
273 | // first node in a walk. |
274 | // |
275 | case Instruction::FPToUI: |
276 | case Instruction::FPToSI: { |
277 | assert(OpRanges.size() == 1 && "FPTo[US]I is a unary operator!" ); |
278 | // Note: We're ignoring the casts output size here as that's what the |
279 | // caller expects. |
280 | auto CastOp = (Instruction::CastOps)I->getOpcode(); |
281 | return OpRanges[0].castOp(CastOp, BitWidth: MaxIntegerBW+1); |
282 | } |
283 | |
284 | case Instruction::FCmp: |
285 | assert(OpRanges.size() == 2 && "FCmp is a binary operator!" ); |
286 | return OpRanges[0].unionWith(CR: OpRanges[1]); |
287 | } |
288 | } |
289 | |
290 | // Walk forwards down the list of seen instructions, so we visit defs before |
291 | // uses. |
292 | void Float2IntPass::walkForwards() { |
293 | std::deque<Instruction *> Worklist; |
294 | for (const auto &Pair : SeenInsts) |
295 | if (Pair.second == unknownRange()) |
296 | Worklist.push_back(x: Pair.first); |
297 | |
298 | while (!Worklist.empty()) { |
299 | Instruction *I = Worklist.back(); |
300 | Worklist.pop_back(); |
301 | |
302 | if (std::optional<ConstantRange> Range = calcRange(I)) |
303 | seen(I, R: *Range); |
304 | else |
305 | Worklist.push_front(x: I); // Reprocess later. |
306 | } |
307 | } |
308 | |
309 | // If there is a valid transform to be done, do it. |
310 | bool Float2IntPass::validateAndTransform(const DataLayout &DL) { |
311 | bool MadeChange = false; |
312 | |
313 | // Iterate over every disjoint partition of the def-use graph. |
314 | for (const auto &E : ECs) { |
315 | if (!E->isLeader()) |
316 | continue; |
317 | |
318 | ConstantRange R(MaxIntegerBW + 1, false); |
319 | bool Fail = false; |
320 | Type *ConvertedToTy = nullptr; |
321 | |
322 | // For every member of the partition, union all the ranges together. |
323 | for (Instruction *I : ECs.members(ECV: *E)) { |
324 | auto *SeenI = SeenInsts.find(Key: I); |
325 | if (SeenI == SeenInsts.end()) |
326 | continue; |
327 | |
328 | R = R.unionWith(CR: SeenI->second); |
329 | // We need to ensure I has no users that have not been seen. |
330 | // If it does, transformation would be illegal. |
331 | // |
332 | // Don't count the roots, as they terminate the graphs. |
333 | if (!Roots.contains(key: I)) { |
334 | // Set the type of the conversion while we're here. |
335 | if (!ConvertedToTy) |
336 | ConvertedToTy = I->getType(); |
337 | for (User *U : I->users()) { |
338 | Instruction *UI = dyn_cast<Instruction>(Val: U); |
339 | if (!UI || !SeenInsts.contains(Key: UI)) { |
340 | LLVM_DEBUG(dbgs() << "F2I: Failing because of " << *U << "\n" ); |
341 | Fail = true; |
342 | break; |
343 | } |
344 | } |
345 | } |
346 | if (Fail) |
347 | break; |
348 | } |
349 | |
350 | // If the set was empty, or we failed, or the range is poisonous, |
351 | // bail out. |
352 | if (ECs.member_begin(ECV: *E) == ECs.member_end() || Fail || R.isFullSet() || |
353 | R.isSignWrappedSet()) |
354 | continue; |
355 | assert(ConvertedToTy && "Must have set the convertedtoty by this point!" ); |
356 | |
357 | // The number of bits required is the maximum of the upper and |
358 | // lower limits, plus one so it can be signed. |
359 | unsigned MinBW = R.getMinSignedBits() + 1; |
360 | LLVM_DEBUG(dbgs() << "F2I: MinBitwidth=" << MinBW << ", R: " << R << "\n" ); |
361 | |
362 | // If we've run off the realms of the exactly representable integers, |
363 | // the floating point result will differ from an integer approximation. |
364 | |
365 | // Do we need more bits than are in the mantissa of the type we converted |
366 | // to? semanticsPrecision returns the number of mantissa bits plus one |
367 | // for the sign bit. |
368 | unsigned MaxRepresentableBits |
369 | = APFloat::semanticsPrecision(ConvertedToTy->getFltSemantics()) - 1; |
370 | if (MinBW > MaxRepresentableBits) { |
371 | LLVM_DEBUG(dbgs() << "F2I: Value not guaranteed to be representable!\n" ); |
372 | continue; |
373 | } |
374 | |
375 | // OK, R is known to be representable. |
376 | // Pick the smallest legal type that will fit. |
377 | Type *Ty = DL.getSmallestLegalIntType(C&: *Ctx, Width: MinBW); |
378 | if (!Ty) { |
379 | // Every supported target supports 64-bit and 32-bit integers, |
380 | // so fallback to a 32 or 64-bit integer if the value fits. |
381 | if (MinBW <= 32) { |
382 | Ty = Type::getInt32Ty(C&: *Ctx); |
383 | } else if (MinBW <= 64) { |
384 | Ty = Type::getInt64Ty(C&: *Ctx); |
385 | } else { |
386 | LLVM_DEBUG(dbgs() << "F2I: Value requires more bits to represent than " |
387 | "the target supports!\n" ); |
388 | continue; |
389 | } |
390 | } |
391 | |
392 | for (Instruction *I : ECs.members(ECV: *E)) |
393 | convert(I, ToTy: Ty); |
394 | MadeChange = true; |
395 | } |
396 | |
397 | return MadeChange; |
398 | } |
399 | |
400 | Value *Float2IntPass::convert(Instruction *I, Type *ToTy) { |
401 | if (auto It = ConvertedInsts.find(Key: I); It != ConvertedInsts.end()) |
402 | // Already converted this instruction. |
403 | return It->second; |
404 | |
405 | SmallVector<Value*,4> NewOperands; |
406 | for (Value *V : I->operands()) { |
407 | // Don't recurse if we're an instruction that terminates the path. |
408 | if (I->getOpcode() == Instruction::UIToFP || |
409 | I->getOpcode() == Instruction::SIToFP) { |
410 | NewOperands.push_back(Elt: V); |
411 | } else if (Instruction *VI = dyn_cast<Instruction>(Val: V)) { |
412 | NewOperands.push_back(Elt: convert(I: VI, ToTy)); |
413 | } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Val: V)) { |
414 | APSInt Val(ToTy->getPrimitiveSizeInBits(), /*isUnsigned=*/false); |
415 | bool Exact; |
416 | CF->getValueAPF().convertToInteger(Result&: Val, |
417 | RM: APFloat::rmNearestTiesToEven, |
418 | IsExact: &Exact); |
419 | NewOperands.push_back(Elt: ConstantInt::get(Ty: ToTy, V: Val)); |
420 | } else { |
421 | llvm_unreachable("Unhandled operand type?" ); |
422 | } |
423 | } |
424 | |
425 | // Now create a new instruction. |
426 | IRBuilder<> IRB(I); |
427 | Value *NewV = nullptr; |
428 | switch (I->getOpcode()) { |
429 | default: llvm_unreachable("Unhandled instruction!" ); |
430 | |
431 | case Instruction::FPToUI: |
432 | NewV = IRB.CreateZExtOrTrunc(V: NewOperands[0], DestTy: I->getType()); |
433 | break; |
434 | |
435 | case Instruction::FPToSI: |
436 | NewV = IRB.CreateSExtOrTrunc(V: NewOperands[0], DestTy: I->getType()); |
437 | break; |
438 | |
439 | case Instruction::FCmp: { |
440 | CmpInst::Predicate P = mapFCmpPred(P: cast<CmpInst>(Val: I)->getPredicate()); |
441 | assert(P != CmpInst::BAD_ICMP_PREDICATE && "Unhandled predicate!" ); |
442 | NewV = IRB.CreateICmp(P, LHS: NewOperands[0], RHS: NewOperands[1], Name: I->getName()); |
443 | break; |
444 | } |
445 | |
446 | case Instruction::UIToFP: |
447 | NewV = IRB.CreateZExtOrTrunc(V: NewOperands[0], DestTy: ToTy); |
448 | break; |
449 | |
450 | case Instruction::SIToFP: |
451 | NewV = IRB.CreateSExtOrTrunc(V: NewOperands[0], DestTy: ToTy); |
452 | break; |
453 | |
454 | case Instruction::FNeg: |
455 | NewV = IRB.CreateNeg(V: NewOperands[0], Name: I->getName()); |
456 | break; |
457 | |
458 | case Instruction::FAdd: |
459 | case Instruction::FSub: |
460 | case Instruction::FMul: |
461 | NewV = IRB.CreateBinOp(Opc: mapBinOpcode(Opcode: I->getOpcode()), |
462 | LHS: NewOperands[0], RHS: NewOperands[1], |
463 | Name: I->getName()); |
464 | break; |
465 | } |
466 | |
467 | // If we're a root instruction, RAUW. |
468 | if (Roots.count(key: I)) |
469 | I->replaceAllUsesWith(V: NewV); |
470 | |
471 | ConvertedInsts[I] = NewV; |
472 | return NewV; |
473 | } |
474 | |
475 | // Perform dead code elimination on the instructions we just modified. |
476 | void Float2IntPass::cleanup() { |
477 | for (auto &I : reverse(C&: ConvertedInsts)) |
478 | I.first->eraseFromParent(); |
479 | } |
480 | |
481 | bool Float2IntPass::runImpl(Function &F, const DominatorTree &DT) { |
482 | LLVM_DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n" ); |
483 | // Clear out all state. |
484 | ECs = EquivalenceClasses<Instruction*>(); |
485 | SeenInsts.clear(); |
486 | ConvertedInsts.clear(); |
487 | Roots.clear(); |
488 | |
489 | Ctx = &F.getParent()->getContext(); |
490 | |
491 | findRoots(F, DT); |
492 | |
493 | walkBackwards(); |
494 | walkForwards(); |
495 | |
496 | const DataLayout &DL = F.getDataLayout(); |
497 | bool Modified = validateAndTransform(DL); |
498 | if (Modified) |
499 | cleanup(); |
500 | return Modified; |
501 | } |
502 | |
503 | PreservedAnalyses Float2IntPass::run(Function &F, FunctionAnalysisManager &AM) { |
504 | const DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
505 | if (!runImpl(F, DT)) |
506 | return PreservedAnalyses::all(); |
507 | |
508 | PreservedAnalyses PA; |
509 | PA.preserveSet<CFGAnalyses>(); |
510 | return PA; |
511 | } |
512 | |