1 | //===- Float2Int.cpp - Demote floating point ops to work on integers ------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the Float2Int pass, which aims to demote floating |
10 | // point operations to work on integers, where that is losslessly possible. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/Transforms/Scalar/Float2Int.h" |
15 | #include "llvm/ADT/APInt.h" |
16 | #include "llvm/ADT/APSInt.h" |
17 | #include "llvm/ADT/SmallVector.h" |
18 | #include "llvm/Analysis/GlobalsModRef.h" |
19 | #include "llvm/IR/Constants.h" |
20 | #include "llvm/IR/Dominators.h" |
21 | #include "llvm/IR/IRBuilder.h" |
22 | #include "llvm/IR/Module.h" |
23 | #include "llvm/Support/CommandLine.h" |
24 | #include "llvm/Support/Debug.h" |
25 | #include "llvm/Support/raw_ostream.h" |
26 | #include <deque> |
27 | |
28 | #define DEBUG_TYPE "float2int" |
29 | |
30 | using namespace llvm; |
31 | |
32 | // The algorithm is simple. Start at instructions that convert from the |
33 | // float to the int domain: fptoui, fptosi and fcmp. Walk up the def-use |
34 | // graph, using an equivalence datastructure to unify graphs that interfere. |
35 | // |
36 | // Mappable instructions are those with an integer corrollary that, given |
37 | // integer domain inputs, produce an integer output; fadd, for example. |
38 | // |
39 | // If a non-mappable instruction is seen, this entire def-use graph is marked |
40 | // as non-transformable. If we see an instruction that converts from the |
41 | // integer domain to FP domain (uitofp,sitofp), we terminate our walk. |
42 | |
43 | /// The largest integer type worth dealing with. |
44 | static cl::opt<unsigned> |
45 | MaxIntegerBW("float2int-max-integer-bw" , cl::init(Val: 64), cl::Hidden, |
46 | cl::desc("Max integer bitwidth to consider in float2int" |
47 | "(default=64)" )); |
48 | |
49 | // Given a FCmp predicate, return a matching ICmp predicate if one |
50 | // exists, otherwise return BAD_ICMP_PREDICATE. |
51 | static CmpInst::Predicate mapFCmpPred(CmpInst::Predicate P) { |
52 | switch (P) { |
53 | case CmpInst::FCMP_OEQ: |
54 | case CmpInst::FCMP_UEQ: |
55 | return CmpInst::ICMP_EQ; |
56 | case CmpInst::FCMP_OGT: |
57 | case CmpInst::FCMP_UGT: |
58 | return CmpInst::ICMP_SGT; |
59 | case CmpInst::FCMP_OGE: |
60 | case CmpInst::FCMP_UGE: |
61 | return CmpInst::ICMP_SGE; |
62 | case CmpInst::FCMP_OLT: |
63 | case CmpInst::FCMP_ULT: |
64 | return CmpInst::ICMP_SLT; |
65 | case CmpInst::FCMP_OLE: |
66 | case CmpInst::FCMP_ULE: |
67 | return CmpInst::ICMP_SLE; |
68 | case CmpInst::FCMP_ONE: |
69 | case CmpInst::FCMP_UNE: |
70 | return CmpInst::ICMP_NE; |
71 | default: |
72 | return CmpInst::BAD_ICMP_PREDICATE; |
73 | } |
74 | } |
75 | |
76 | // Given a floating point binary operator, return the matching |
77 | // integer version. |
78 | static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) { |
79 | switch (Opcode) { |
80 | default: llvm_unreachable("Unhandled opcode!" ); |
81 | case Instruction::FAdd: return Instruction::Add; |
82 | case Instruction::FSub: return Instruction::Sub; |
83 | case Instruction::FMul: return Instruction::Mul; |
84 | } |
85 | } |
86 | |
87 | // Find the roots - instructions that convert from the FP domain to |
88 | // integer domain. |
89 | void Float2IntPass::findRoots(Function &F, const DominatorTree &DT) { |
90 | for (BasicBlock &BB : F) { |
91 | // Unreachable code can take on strange forms that we are not prepared to |
92 | // handle. For example, an instruction may have itself as an operand. |
93 | if (!DT.isReachableFromEntry(A: &BB)) |
94 | continue; |
95 | |
96 | for (Instruction &I : BB) { |
97 | if (isa<VectorType>(Val: I.getType())) |
98 | continue; |
99 | switch (I.getOpcode()) { |
100 | default: break; |
101 | case Instruction::FPToUI: |
102 | case Instruction::FPToSI: |
103 | Roots.insert(X: &I); |
104 | break; |
105 | case Instruction::FCmp: |
106 | if (mapFCmpPred(P: cast<CmpInst>(Val: &I)->getPredicate()) != |
107 | CmpInst::BAD_ICMP_PREDICATE) |
108 | Roots.insert(X: &I); |
109 | break; |
110 | } |
111 | } |
112 | } |
113 | } |
114 | |
115 | // Helper - mark I as having been traversed, having range R. |
116 | void Float2IntPass::seen(Instruction *I, ConstantRange R) { |
117 | LLVM_DEBUG(dbgs() << "F2I: " << *I << ":" << R << "\n" ); |
118 | auto IT = SeenInsts.find(Key: I); |
119 | if (IT != SeenInsts.end()) |
120 | IT->second = std::move(R); |
121 | else |
122 | SeenInsts.insert(KV: std::make_pair(x&: I, y: std::move(R))); |
123 | } |
124 | |
125 | // Helper - get a range representing a poison value. |
126 | ConstantRange Float2IntPass::badRange() { |
127 | return ConstantRange::getFull(BitWidth: MaxIntegerBW + 1); |
128 | } |
129 | ConstantRange Float2IntPass::unknownRange() { |
130 | return ConstantRange::getEmpty(BitWidth: MaxIntegerBW + 1); |
131 | } |
132 | ConstantRange Float2IntPass::validateRange(ConstantRange R) { |
133 | if (R.getBitWidth() > MaxIntegerBW + 1) |
134 | return badRange(); |
135 | return R; |
136 | } |
137 | |
138 | // The most obvious way to structure the search is a depth-first, eager |
139 | // search from each root. However, that require direct recursion and so |
140 | // can only handle small instruction sequences. Instead, we split the search |
141 | // up into two phases: |
142 | // - walkBackwards: A breadth-first walk of the use-def graph starting from |
143 | // the roots. Populate "SeenInsts" with interesting |
144 | // instructions and poison values if they're obvious and |
145 | // cheap to compute. Calculate the equivalance set structure |
146 | // while we're here too. |
147 | // - walkForwards: Iterate over SeenInsts in reverse order, so we visit |
148 | // defs before their uses. Calculate the real range info. |
149 | |
150 | // Breadth-first walk of the use-def graph; determine the set of nodes |
151 | // we care about and eagerly determine if some of them are poisonous. |
152 | void Float2IntPass::walkBackwards() { |
153 | std::deque<Instruction*> Worklist(Roots.begin(), Roots.end()); |
154 | while (!Worklist.empty()) { |
155 | Instruction *I = Worklist.back(); |
156 | Worklist.pop_back(); |
157 | |
158 | if (SeenInsts.contains(Key: I)) |
159 | // Seen already. |
160 | continue; |
161 | |
162 | switch (I->getOpcode()) { |
163 | // FIXME: Handle select and phi nodes. |
164 | default: |
165 | // Path terminated uncleanly. |
166 | seen(I, R: badRange()); |
167 | break; |
168 | |
169 | case Instruction::UIToFP: |
170 | case Instruction::SIToFP: { |
171 | // Path terminated cleanly - use the type of the integer input to seed |
172 | // the analysis. |
173 | unsigned BW = I->getOperand(i: 0)->getType()->getPrimitiveSizeInBits(); |
174 | auto Input = ConstantRange::getFull(BitWidth: BW); |
175 | auto CastOp = (Instruction::CastOps)I->getOpcode(); |
176 | seen(I, R: validateRange(R: Input.castOp(CastOp, BitWidth: MaxIntegerBW+1))); |
177 | continue; |
178 | } |
179 | |
180 | case Instruction::FNeg: |
181 | case Instruction::FAdd: |
182 | case Instruction::FSub: |
183 | case Instruction::FMul: |
184 | case Instruction::FPToUI: |
185 | case Instruction::FPToSI: |
186 | case Instruction::FCmp: |
187 | seen(I, R: unknownRange()); |
188 | break; |
189 | } |
190 | |
191 | for (Value *O : I->operands()) { |
192 | if (Instruction *OI = dyn_cast<Instruction>(Val: O)) { |
193 | // Unify def-use chains if they interfere. |
194 | ECs.unionSets(V1: I, V2: OI); |
195 | if (SeenInsts.find(Key: I)->second != badRange()) |
196 | Worklist.push_back(x: OI); |
197 | } else if (!isa<ConstantFP>(Val: O)) { |
198 | // Not an instruction or ConstantFP? we can't do anything. |
199 | seen(I, R: badRange()); |
200 | } |
201 | } |
202 | } |
203 | } |
204 | |
205 | // Calculate result range from operand ranges. |
206 | // Return std::nullopt if the range cannot be calculated yet. |
207 | std::optional<ConstantRange> Float2IntPass::calcRange(Instruction *I) { |
208 | SmallVector<ConstantRange, 4> OpRanges; |
209 | for (Value *O : I->operands()) { |
210 | if (Instruction *OI = dyn_cast<Instruction>(Val: O)) { |
211 | auto OpIt = SeenInsts.find(Key: OI); |
212 | assert(OpIt != SeenInsts.end() && "def not seen before use!" ); |
213 | if (OpIt->second == unknownRange()) |
214 | return std::nullopt; // Wait until operand range has been calculated. |
215 | OpRanges.push_back(Elt: OpIt->second); |
216 | } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Val: O)) { |
217 | // Work out if the floating point number can be losslessly represented |
218 | // as an integer. |
219 | // APFloat::convertToInteger(&Exact) purports to do what we want, but |
220 | // the exactness can be too precise. For example, negative zero can |
221 | // never be exactly converted to an integer. |
222 | // |
223 | // Instead, we ask APFloat to round itself to an integral value - this |
224 | // preserves sign-of-zero - then compare the result with the original. |
225 | // |
226 | const APFloat &F = CF->getValueAPF(); |
227 | |
228 | // First, weed out obviously incorrect values. Non-finite numbers |
229 | // can't be represented and neither can negative zero, unless |
230 | // we're in fast math mode. |
231 | if (!F.isFinite() || |
232 | (F.isZero() && F.isNegative() && isa<FPMathOperator>(Val: I) && |
233 | !I->hasNoSignedZeros())) |
234 | return badRange(); |
235 | |
236 | APFloat NewF = F; |
237 | auto Res = NewF.roundToIntegral(RM: APFloat::rmNearestTiesToEven); |
238 | if (Res != APFloat::opOK || NewF != F) |
239 | return badRange(); |
240 | |
241 | // OK, it's representable. Now get it. |
242 | APSInt Int(MaxIntegerBW+1, false); |
243 | bool Exact; |
244 | CF->getValueAPF().convertToInteger(Result&: Int, |
245 | RM: APFloat::rmNearestTiesToEven, |
246 | IsExact: &Exact); |
247 | OpRanges.push_back(Elt: ConstantRange(Int)); |
248 | } else { |
249 | llvm_unreachable("Should have already marked this as badRange!" ); |
250 | } |
251 | } |
252 | |
253 | switch (I->getOpcode()) { |
254 | // FIXME: Handle select and phi nodes. |
255 | default: |
256 | case Instruction::UIToFP: |
257 | case Instruction::SIToFP: |
258 | llvm_unreachable("Should have been handled in walkForwards!" ); |
259 | |
260 | case Instruction::FNeg: { |
261 | assert(OpRanges.size() == 1 && "FNeg is a unary operator!" ); |
262 | unsigned Size = OpRanges[0].getBitWidth(); |
263 | auto Zero = ConstantRange(APInt::getZero(numBits: Size)); |
264 | return Zero.sub(Other: OpRanges[0]); |
265 | } |
266 | |
267 | case Instruction::FAdd: |
268 | case Instruction::FSub: |
269 | case Instruction::FMul: { |
270 | assert(OpRanges.size() == 2 && "its a binary operator!" ); |
271 | auto BinOp = (Instruction::BinaryOps) I->getOpcode(); |
272 | return OpRanges[0].binaryOp(BinOp, Other: OpRanges[1]); |
273 | } |
274 | |
275 | // |
276 | // Root-only instructions - we'll only see these if they're the |
277 | // first node in a walk. |
278 | // |
279 | case Instruction::FPToUI: |
280 | case Instruction::FPToSI: { |
281 | assert(OpRanges.size() == 1 && "FPTo[US]I is a unary operator!" ); |
282 | // Note: We're ignoring the casts output size here as that's what the |
283 | // caller expects. |
284 | auto CastOp = (Instruction::CastOps)I->getOpcode(); |
285 | return OpRanges[0].castOp(CastOp, BitWidth: MaxIntegerBW+1); |
286 | } |
287 | |
288 | case Instruction::FCmp: |
289 | assert(OpRanges.size() == 2 && "FCmp is a binary operator!" ); |
290 | return OpRanges[0].unionWith(CR: OpRanges[1]); |
291 | } |
292 | } |
293 | |
294 | // Walk forwards down the list of seen instructions, so we visit defs before |
295 | // uses. |
296 | void Float2IntPass::walkForwards() { |
297 | std::deque<Instruction *> Worklist; |
298 | for (const auto &Pair : SeenInsts) |
299 | if (Pair.second == unknownRange()) |
300 | Worklist.push_back(x: Pair.first); |
301 | |
302 | while (!Worklist.empty()) { |
303 | Instruction *I = Worklist.back(); |
304 | Worklist.pop_back(); |
305 | |
306 | if (std::optional<ConstantRange> Range = calcRange(I)) |
307 | seen(I, R: *Range); |
308 | else |
309 | Worklist.push_front(x: I); // Reprocess later. |
310 | } |
311 | } |
312 | |
313 | // If there is a valid transform to be done, do it. |
314 | bool Float2IntPass::validateAndTransform(const DataLayout &DL) { |
315 | bool MadeChange = false; |
316 | |
317 | // Iterate over every disjoint partition of the def-use graph. |
318 | for (auto It = ECs.begin(), E = ECs.end(); It != E; ++It) { |
319 | ConstantRange R(MaxIntegerBW + 1, false); |
320 | bool Fail = false; |
321 | Type *ConvertedToTy = nullptr; |
322 | |
323 | // For every member of the partition, union all the ranges together. |
324 | for (auto MI = ECs.member_begin(I: It), ME = ECs.member_end(); |
325 | MI != ME; ++MI) { |
326 | Instruction *I = *MI; |
327 | auto SeenI = SeenInsts.find(Key: I); |
328 | if (SeenI == SeenInsts.end()) |
329 | continue; |
330 | |
331 | R = R.unionWith(CR: SeenI->second); |
332 | // We need to ensure I has no users that have not been seen. |
333 | // If it does, transformation would be illegal. |
334 | // |
335 | // Don't count the roots, as they terminate the graphs. |
336 | if (!Roots.contains(key: I)) { |
337 | // Set the type of the conversion while we're here. |
338 | if (!ConvertedToTy) |
339 | ConvertedToTy = I->getType(); |
340 | for (User *U : I->users()) { |
341 | Instruction *UI = dyn_cast<Instruction>(Val: U); |
342 | if (!UI || !SeenInsts.contains(Key: UI)) { |
343 | LLVM_DEBUG(dbgs() << "F2I: Failing because of " << *U << "\n" ); |
344 | Fail = true; |
345 | break; |
346 | } |
347 | } |
348 | } |
349 | if (Fail) |
350 | break; |
351 | } |
352 | |
353 | // If the set was empty, or we failed, or the range is poisonous, |
354 | // bail out. |
355 | if (ECs.member_begin(I: It) == ECs.member_end() || Fail || |
356 | R.isFullSet() || R.isSignWrappedSet()) |
357 | continue; |
358 | assert(ConvertedToTy && "Must have set the convertedtoty by this point!" ); |
359 | |
360 | // The number of bits required is the maximum of the upper and |
361 | // lower limits, plus one so it can be signed. |
362 | unsigned MinBW = R.getMinSignedBits() + 1; |
363 | LLVM_DEBUG(dbgs() << "F2I: MinBitwidth=" << MinBW << ", R: " << R << "\n" ); |
364 | |
365 | // If we've run off the realms of the exactly representable integers, |
366 | // the floating point result will differ from an integer approximation. |
367 | |
368 | // Do we need more bits than are in the mantissa of the type we converted |
369 | // to? semanticsPrecision returns the number of mantissa bits plus one |
370 | // for the sign bit. |
371 | unsigned MaxRepresentableBits |
372 | = APFloat::semanticsPrecision(ConvertedToTy->getFltSemantics()) - 1; |
373 | if (MinBW > MaxRepresentableBits) { |
374 | LLVM_DEBUG(dbgs() << "F2I: Value not guaranteed to be representable!\n" ); |
375 | continue; |
376 | } |
377 | |
378 | // OK, R is known to be representable. |
379 | // Pick the smallest legal type that will fit. |
380 | Type *Ty = DL.getSmallestLegalIntType(C&: *Ctx, Width: MinBW); |
381 | if (!Ty) { |
382 | // Every supported target supports 64-bit and 32-bit integers, |
383 | // so fallback to a 32 or 64-bit integer if the value fits. |
384 | if (MinBW <= 32) { |
385 | Ty = Type::getInt32Ty(C&: *Ctx); |
386 | } else if (MinBW <= 64) { |
387 | Ty = Type::getInt64Ty(C&: *Ctx); |
388 | } else { |
389 | LLVM_DEBUG(dbgs() << "F2I: Value requires more bits to represent than " |
390 | "the target supports!\n" ); |
391 | continue; |
392 | } |
393 | } |
394 | |
395 | for (auto MI = ECs.member_begin(I: It), ME = ECs.member_end(); |
396 | MI != ME; ++MI) |
397 | convert(I: *MI, ToTy: Ty); |
398 | MadeChange = true; |
399 | } |
400 | |
401 | return MadeChange; |
402 | } |
403 | |
404 | Value *Float2IntPass::convert(Instruction *I, Type *ToTy) { |
405 | if (ConvertedInsts.contains(Key: I)) |
406 | // Already converted this instruction. |
407 | return ConvertedInsts[I]; |
408 | |
409 | SmallVector<Value*,4> NewOperands; |
410 | for (Value *V : I->operands()) { |
411 | // Don't recurse if we're an instruction that terminates the path. |
412 | if (I->getOpcode() == Instruction::UIToFP || |
413 | I->getOpcode() == Instruction::SIToFP) { |
414 | NewOperands.push_back(Elt: V); |
415 | } else if (Instruction *VI = dyn_cast<Instruction>(Val: V)) { |
416 | NewOperands.push_back(Elt: convert(I: VI, ToTy)); |
417 | } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Val: V)) { |
418 | APSInt Val(ToTy->getPrimitiveSizeInBits(), /*isUnsigned=*/false); |
419 | bool Exact; |
420 | CF->getValueAPF().convertToInteger(Result&: Val, |
421 | RM: APFloat::rmNearestTiesToEven, |
422 | IsExact: &Exact); |
423 | NewOperands.push_back(Elt: ConstantInt::get(Ty: ToTy, V: Val)); |
424 | } else { |
425 | llvm_unreachable("Unhandled operand type?" ); |
426 | } |
427 | } |
428 | |
429 | // Now create a new instruction. |
430 | IRBuilder<> IRB(I); |
431 | Value *NewV = nullptr; |
432 | switch (I->getOpcode()) { |
433 | default: llvm_unreachable("Unhandled instruction!" ); |
434 | |
435 | case Instruction::FPToUI: |
436 | NewV = IRB.CreateZExtOrTrunc(V: NewOperands[0], DestTy: I->getType()); |
437 | break; |
438 | |
439 | case Instruction::FPToSI: |
440 | NewV = IRB.CreateSExtOrTrunc(V: NewOperands[0], DestTy: I->getType()); |
441 | break; |
442 | |
443 | case Instruction::FCmp: { |
444 | CmpInst::Predicate P = mapFCmpPred(P: cast<CmpInst>(Val: I)->getPredicate()); |
445 | assert(P != CmpInst::BAD_ICMP_PREDICATE && "Unhandled predicate!" ); |
446 | NewV = IRB.CreateICmp(P, LHS: NewOperands[0], RHS: NewOperands[1], Name: I->getName()); |
447 | break; |
448 | } |
449 | |
450 | case Instruction::UIToFP: |
451 | NewV = IRB.CreateZExtOrTrunc(V: NewOperands[0], DestTy: ToTy); |
452 | break; |
453 | |
454 | case Instruction::SIToFP: |
455 | NewV = IRB.CreateSExtOrTrunc(V: NewOperands[0], DestTy: ToTy); |
456 | break; |
457 | |
458 | case Instruction::FNeg: |
459 | NewV = IRB.CreateNeg(V: NewOperands[0], Name: I->getName()); |
460 | break; |
461 | |
462 | case Instruction::FAdd: |
463 | case Instruction::FSub: |
464 | case Instruction::FMul: |
465 | NewV = IRB.CreateBinOp(Opc: mapBinOpcode(Opcode: I->getOpcode()), |
466 | LHS: NewOperands[0], RHS: NewOperands[1], |
467 | Name: I->getName()); |
468 | break; |
469 | } |
470 | |
471 | // If we're a root instruction, RAUW. |
472 | if (Roots.count(key: I)) |
473 | I->replaceAllUsesWith(V: NewV); |
474 | |
475 | ConvertedInsts[I] = NewV; |
476 | return NewV; |
477 | } |
478 | |
479 | // Perform dead code elimination on the instructions we just modified. |
480 | void Float2IntPass::cleanup() { |
481 | for (auto &I : reverse(C&: ConvertedInsts)) |
482 | I.first->eraseFromParent(); |
483 | } |
484 | |
485 | bool Float2IntPass::runImpl(Function &F, const DominatorTree &DT) { |
486 | LLVM_DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n" ); |
487 | // Clear out all state. |
488 | ECs = EquivalenceClasses<Instruction*>(); |
489 | SeenInsts.clear(); |
490 | ConvertedInsts.clear(); |
491 | Roots.clear(); |
492 | |
493 | Ctx = &F.getParent()->getContext(); |
494 | |
495 | findRoots(F, DT); |
496 | |
497 | walkBackwards(); |
498 | walkForwards(); |
499 | |
500 | const DataLayout &DL = F.getDataLayout(); |
501 | bool Modified = validateAndTransform(DL); |
502 | if (Modified) |
503 | cleanup(); |
504 | return Modified; |
505 | } |
506 | |
507 | PreservedAnalyses Float2IntPass::run(Function &F, FunctionAnalysisManager &AM) { |
508 | const DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
509 | if (!runImpl(F, DT)) |
510 | return PreservedAnalyses::all(); |
511 | |
512 | PreservedAnalyses PA; |
513 | PA.preserveSet<CFGAnalyses>(); |
514 | return PA; |
515 | } |
516 | |