Float2Int.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/Float2Int.cpp]

1	//===- Float2Int.cpp - Demote floating point ops to work on integers ------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the Float2Int pass, which aims to demote floating
10	// point operations to work on integers, where that is losslessly possible.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/Transforms/Scalar/Float2Int.h"
15	#include "llvm/ADT/APInt.h"
16	#include "llvm/ADT/APSInt.h"
17	#include "llvm/ADT/SmallVector.h"
18	#include "llvm/Analysis/GlobalsModRef.h"
19	#include "llvm/IR/Constants.h"
20	#include "llvm/IR/Dominators.h"
21	#include "llvm/IR/IRBuilder.h"
22	#include "llvm/IR/Module.h"
23	#include "llvm/Support/CommandLine.h"
24	#include "llvm/Support/Debug.h"
25	#include "llvm/Support/raw_ostream.h"
26	#include <deque>
27
28	#define DEBUG_TYPE "float2int"
29
30	using namespace llvm;
31
32	// The algorithm is simple. Start at instructions that convert from the
33	// float to the int domain: fptoui, fptosi and fcmp. Walk up the def-use
34	// graph, using an equivalence datastructure to unify graphs that interfere.
35	//
36	// Mappable instructions are those with an integer corrollary that, given
37	// integer domain inputs, produce an integer output; fadd, for example.
38	//
39	// If a non-mappable instruction is seen, this entire def-use graph is marked
40	// as non-transformable. If we see an instruction that converts from the
41	// integer domain to FP domain (uitofp,sitofp), we terminate our walk.
42
43	/// The largest integer type worth dealing with.
44	static cl::opt<unsigned>
45	MaxIntegerBW("float2int-max-integer-bw", cl::init(Val: `64`), cl::Hidden,
46	cl::desc ("Max integer bitwidth to consider in float2int"
47	"(default=64)"));
48
49	// Given a FCmp predicate, return a matching ICmp predicate if one
50	// exists, otherwise return BAD_ICMP_PREDICATE.
51	static CmpInst::Predicate mapFCmpPred(CmpInst::Predicate P) {
52	switch (P) {
53	case CmpInst::FCMP_OEQ:
54	case CmpInst::FCMP_UEQ:
55	return CmpInst::ICMP_EQ;
56	case CmpInst::FCMP_OGT:
57	case CmpInst::FCMP_UGT:
58	return CmpInst::ICMP_SGT;
59	case CmpInst::FCMP_OGE:
60	case CmpInst::FCMP_UGE:
61	return CmpInst::ICMP_SGE;
62	case CmpInst::FCMP_OLT:
63	case CmpInst::FCMP_ULT:
64	return CmpInst::ICMP_SLT;
65	case CmpInst::FCMP_OLE:
66	case CmpInst::FCMP_ULE:
67	return CmpInst::ICMP_SLE;
68	case CmpInst::FCMP_ONE:
69	case CmpInst::FCMP_UNE:
70	return CmpInst::ICMP_NE;
71	default:
72	return CmpInst::BAD_ICMP_PREDICATE;
73	}
74	}
75
76	// Given a floating point binary operator, return the matching
77	// integer version.
78	static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
79	switch (Opcode) {
80	default: llvm_unreachable("Unhandled opcode!");
81	case Instruction::FAdd: return Instruction::Add;
82	case Instruction::FSub: return Instruction::Sub;
83	case Instruction::FMul: return Instruction::Mul;
84	}
85	}
86
87	// Find the roots - instructions that convert from the FP domain to
88	// integer domain.
89	void Float2IntPass::findRoots(Function &F, const DominatorTree &DT) {
90	for (BasicBlock &BB : F) {
91	// Unreachable code can take on strange forms that we are not prepared to
92	// handle. For example, an instruction may have itself as an operand.
93	if (!DT.isReachableFromEntry(A: &BB))
94	continue;
95
96	for (Instruction &I : BB) {
97	if (isa<VectorType>(Val: I.getType()))
98	continue;
99	switch (I.getOpcode()) {
100	default: break;
101	case Instruction::FPToUI:
102	case Instruction::FPToSI:
103	Roots.insert(X: &I);
104	break;
105	case Instruction::FCmp:
106	if (mapFCmpPred(P: cast<CmpInst>(Val: &I)->getPredicate()) !=
107	CmpInst::BAD_ICMP_PREDICATE)
108	Roots.insert(X: &I);
109	break;
110	}
111	}
112	}
113	}
114
115	// Helper - mark I as having been traversed, having range R.
116	void Float2IntPass::seen(Instruction *I, ConstantRange R) {
117	LLVM_DEBUG(dbgs() << "F2I: " << *I << ":" << R << "\n");
118	SeenInsts.insert_or_assign(Key: I, Val: std::move(R));
119	}
120
121	// Helper - get a range representing a poison value.
122	ConstantRange Float2IntPass::badRange() {
123	return ConstantRange::getFull(BitWidth: MaxIntegerBW + `1`);
124	}
125	ConstantRange Float2IntPass::unknownRange() {
126	return ConstantRange::getEmpty(BitWidth: MaxIntegerBW + `1`);
127	}
128	ConstantRange Float2IntPass::validateRange(ConstantRange R) {
129	if (R.getBitWidth() > MaxIntegerBW + `1`)
130	return badRange();
131	return R;
132	}
133
134	// The most obvious way to structure the search is a depth-first, eager
135	// search from each root. However, that require direct recursion and so
136	// can only handle small instruction sequences. Instead, we split the search
137	// up into two phases:
138	// - walkBackwards: A breadth-first walk of the use-def graph starting from
139	// the roots. Populate "SeenInsts" with interesting
140	// instructions and poison values if they're obvious and
141	// cheap to compute. Calculate the equivalance set structure
142	// while we're here too.
143	// - walkForwards: Iterate over SeenInsts in reverse order, so we visit
144	// defs before their uses. Calculate the real range info.
145
146	// Breadth-first walk of the use-def graph; determine the set of nodes
147	// we care about and eagerly determine if some of them are poisonous.
148	void Float2IntPass::walkBackwards() {
149	std::deque<Instruction*> Worklist(Roots.begin(), Roots.end());
150	while (!Worklist.empty()) {
151	Instruction *I = Worklist.back();
152	Worklist.pop_back();
153
154	if (SeenInsts.contains(Key: I))
155	// Seen already.
156	continue;
157
158	switch (I->getOpcode()) {
159	// FIXME: Handle select and phi nodes.
160	default:
161	// Path terminated uncleanly.
162	seen(I, R: badRange());
163	break;
164
165	case Instruction::UIToFP:
166	case Instruction::SIToFP: {
167	// Path terminated cleanly - use the type of the integer input to seed
168	// the analysis.
169	unsigned BW = I->getOperand(i: `0`)->getType()->getPrimitiveSizeInBits();
170	auto Input = ConstantRange::getFull(BitWidth: BW);
171	auto CastOp = (Instruction::CastOps)I->getOpcode();
172	seen(I, R: validateRange(R: Input.castOp(CastOp, BitWidth: MaxIntegerBW+`1`)));
173	continue;
174	}
175
176	case Instruction::FNeg:
177	case Instruction::FAdd:
178	case Instruction::FSub:
179	case Instruction::FMul:
180	case Instruction::FPToUI:
181	case Instruction::FPToSI:
182	case Instruction::FCmp:
183	seen(I, R: unknownRange());
184	break;
185	}
186
187	for (Value *O : I->operands()) {
188	if (Instruction *OI = dyn_cast<Instruction>(Val: O)) {
189	// Unify def-use chains if they interfere.
190	ECs.unionSets(V1: I, V2: OI);
191	if (SeenInsts.find(Key: I)->second != badRange())
192	Worklist.push_back(x: OI);
193	} else if (!isa<ConstantFP>(Val: O)) {
194	// Not an instruction or ConstantFP? we can't do anything.
195	seen(I, R: badRange());
196	}
197	}
198	}
199	}
200
201	// Calculate result range from operand ranges.
202	// Return std::nullopt if the range cannot be calculated yet.
203	std::optional<ConstantRange> Float2IntPass::calcRange(Instruction *I) {
204	SmallVector<ConstantRange, `4`> OpRanges;
205	for (Value *O : I->operands()) {
206	if (Instruction *OI = dyn_cast<Instruction>(Val: O)) {
207	auto OpIt = SeenInsts.find(Key: OI);
208	assert(OpIt != SeenInsts.end() && "def not seen before use!");
209	if (OpIt->second == unknownRange())
210	return std::nullopt; // Wait until operand range has been calculated.
211	OpRanges.push_back(Elt: OpIt->second);
212	} else if (ConstantFP *CF = dyn_cast<ConstantFP>(Val: O)) {
213	// Work out if the floating point number can be losslessly represented
214	// as an integer.
215	// APFloat::convertToInteger(&Exact) purports to do what we want, but
216	// the exactness can be too precise. For example, negative zero can
217	// never be exactly converted to an integer.
218	//
219	// Instead, we ask APFloat to round itself to an integral value - this
220	// preserves sign-of-zero - then compare the result with the original.
221	//
222	const APFloat &F = CF->getValueAPF();
223
224	// First, weed out obviously incorrect values. Non-finite numbers
225	// can't be represented and neither can negative zero, unless
226	// we're in fast math mode.
227	if (!F.isFinite() \|\|
228	(F.isZero() && F.isNegative() && isa<FPMathOperator>(Val: I) &&
229	!I->hasNoSignedZeros()))
230	return badRange();
231
232	APFloat NewF = F;
233	auto Res = NewF.roundToIntegral(RM: APFloat::rmNearestTiesToEven);
234	if (Res != APFloat::opOK \|\| NewF != F)
235	return badRange();
236
237	// OK, it's representable. Now get it.
238	APSInt Int(MaxIntegerBW+`1`, false);
239	bool Exact;
240	CF->getValueAPF().convertToInteger(Result&: Int,
241	RM: APFloat::rmNearestTiesToEven,
242	IsExact: &Exact);
243	OpRanges.push_back(Elt: ConstantRange (Int));
244	} else {
245	llvm_unreachable("Should have already marked this as badRange!");
246	}
247	}
248
249	switch (I->getOpcode()) {
250	// FIXME: Handle select and phi nodes.
251	default:
252	case Instruction::UIToFP:
253	case Instruction::SIToFP:
254	llvm_unreachable("Should have been handled in walkForwards!");
255
256	case Instruction::FNeg: {
257	assert(OpRanges.size() == `1` && "FNeg is a unary operator!");
258	unsigned Size = OpRanges [`0`].getBitWidth();
259	auto Zero = ConstantRange (APInt::getZero(numBits: Size));
260	return Zero.sub(Other: OpRanges [`0`]);
261	}
262
263	case Instruction::FAdd:
264	case Instruction::FSub:
265	case Instruction::FMul: {
266	assert(OpRanges.size() == `2` && "its a binary operator!");
267	auto BinOp = (Instruction::BinaryOps) I->getOpcode();
268	return OpRanges [`0`].binaryOp(BinOp, Other: OpRanges [`1`]);
269	}
270
271	//
272	// Root-only instructions - we'll only see these if they're the
273	// first node in a walk.
274	//
275	case Instruction::FPToUI:
276	case Instruction::FPToSI: {
277	assert(OpRanges.size() == `1` && "FPTo[US]I is a unary operator!");
278	// Note: We're ignoring the casts output size here as that's what the
279	// caller expects.
280	auto CastOp = (Instruction::CastOps)I->getOpcode();
281	return OpRanges [`0`].castOp(CastOp, BitWidth: MaxIntegerBW+`1`);
282	}
283
284	case Instruction::FCmp:
285	assert(OpRanges.size() == `2` && "FCmp is a binary operator!");
286	return OpRanges [`0`].unionWith(CR: OpRanges [`1`]);
287	}
288	}
289
290	// Walk forwards down the list of seen instructions, so we visit defs before
291	// uses.
292	void Float2IntPass::walkForwards() {
293	std::deque<Instruction *> Worklist;
294	for (const auto &Pair : SeenInsts)
295	if (Pair.second == unknownRange())
296	Worklist.push_back(x: Pair.first);
297
298	while (!Worklist.empty()) {
299	Instruction *I = Worklist.back();
300	Worklist.pop_back();
301
302	if (std::optional<ConstantRange> Range = calcRange(I))
303	seen(I, R: *Range);
304	else
305	Worklist.push_front(x: I); // Reprocess later.
306	}
307	}
308
309	// If there is a valid transform to be done, do it.
310	bool Float2IntPass::validateAndTransform(const DataLayout &DL) {
311	bool MadeChange = false;
312
313	// Iterate over every disjoint partition of the def-use graph.
314	for (const auto &E : ECs) {
315	if (!E->isLeader())
316	continue;
317
318	ConstantRange R(MaxIntegerBW + `1`, false);
319	bool Fail = false;
320	Type ConvertedToTy = nullptr*;
321
322	// For every member of the partition, union all the ranges together.
323	for (Instruction I : ECs.members(ECV: E)) {
324	auto *SeenI = SeenInsts.find(Key: I);
325	if (SeenI == SeenInsts.end())
326	continue;
327
328	R = R.unionWith(CR: SeenI->second);
329	// We need to ensure I has no users that have not been seen.
330	// If it does, transformation would be illegal.
331	//
332	// Don't count the roots, as they terminate the graphs.
333	if (!Roots.contains(key: I)) {
334	// Set the type of the conversion while we're here.
335	if (!ConvertedToTy)
336	ConvertedToTy = I->getType();
337	for (User *U : I->users()) {
338	Instruction *UI = dyn_cast<Instruction>(Val: U);
339	if (!UI \|\| !SeenInsts.contains(Key: UI)) {
340	LLVM_DEBUG(dbgs() << "F2I: Failing because of " << *U << "\n");
341	Fail = true;
342	break;
343	}
344	}
345	}
346	if (Fail)
347	break;
348	}
349
350	// If the set was empty, or we failed, or the range is poisonous,
351	// bail out.
352	if (ECs.member_begin(ECV: *E) == ECs.member_end() \|\| Fail \|\| R.isFullSet() \|\|
353	R.isSignWrappedSet())
354	continue;
355	assert(ConvertedToTy && "Must have set the convertedtoty by this point!");
356
357	// The number of bits required is the maximum of the upper and
358	// lower limits, plus one so it can be signed.
359	unsigned MinBW = R.getMinSignedBits() + `1`;
360	LLVM_DEBUG(dbgs() << "F2I: MinBitwidth=" << MinBW << ", R: " << R << "\n");
361
362	// If we've run off the realms of the exactly representable integers,
363	// the floating point result will differ from an integer approximation.
364
365	// Do we need more bits than are in the mantissa of the type we converted
366	// to? semanticsPrecision returns the number of mantissa bits plus one
367	// for the sign bit.
368	unsigned MaxRepresentableBits
369	= APFloat::semanticsPrecision(ConvertedToTy->getFltSemantics()) - `1`;
370	if (MinBW > MaxRepresentableBits) {
371	LLVM_DEBUG(dbgs() << "F2I: Value not guaranteed to be representable!\n");
372	continue;
373	}
374
375	// OK, R is known to be representable.
376	// Pick the smallest legal type that will fit.
377	Type Ty = DL.getSmallestLegalIntType(C&: Ctx, Width: MinBW);
378	if (!Ty) {
379	// Every supported target supports 64-bit and 32-bit integers,
380	// so fallback to a 32 or 64-bit integer if the value fits.
381	if (MinBW <= `32`) {
382	Ty = Type::getInt32Ty(C&: *Ctx);
383	} else if (MinBW <= `64`) {
384	Ty = Type::getInt64Ty(C&: *Ctx);
385	} else {
386	LLVM_DEBUG(dbgs() << "F2I: Value requires more bits to represent than "
387	"the target supports!\n");
388	continue;
389	}
390	}
391
392	for (Instruction I : ECs.members(ECV: E))
393	convert(I, ToTy: Ty);
394	MadeChange = true;
395	}
396
397	return MadeChange;
398	}
399
400	Value Float2IntPass::convert(Instruction I, Type *ToTy) {
401	if (auto It = ConvertedInsts.find(Key: I); It != ConvertedInsts.end())
402	// Already converted this instruction.
403	return It->second;
404
405	SmallVector<Value*,`4`> NewOperands;
406	for (Value *V : I->operands()) {
407	// Don't recurse if we're an instruction that terminates the path.
408	if (I->getOpcode() == Instruction::UIToFP \|\|
409	I->getOpcode() == Instruction::SIToFP) {
410	NewOperands.push_back(Elt: V);
411	} else if (Instruction *VI = dyn_cast<Instruction>(Val: V)) {
412	NewOperands.push_back(Elt: convert(I: VI, ToTy));
413	} else if (ConstantFP *CF = dyn_cast<ConstantFP>(Val: V)) {
414	APSInt Val(ToTy->getPrimitiveSizeInBits(), /isUnsigned=/false);
415	bool Exact;
416	CF->getValueAPF().convertToInteger(Result&: Val,
417	RM: APFloat::rmNearestTiesToEven,
418	IsExact: &Exact);
419	NewOperands.push_back(Elt: ConstantInt::get(Ty: ToTy, V: Val));
420	} else {
421	llvm_unreachable("Unhandled operand type?");
422	}
423	}
424
425	// Now create a new instruction.
426	IRBuilder<> IRB(I);
427	Value NewV = nullptr*;
428	switch (I->getOpcode()) {
429	default: llvm_unreachable("Unhandled instruction!");
430
431	case Instruction::FPToUI:
432	NewV = IRB.CreateZExtOrTrunc(V: NewOperands [`0`], DestTy: I->getType());
433	break;
434
435	case Instruction::FPToSI:
436	NewV = IRB.CreateSExtOrTrunc(V: NewOperands [`0`], DestTy: I->getType());
437	break;
438
439	case Instruction::FCmp: {
440	CmpInst::Predicate P = mapFCmpPred(P: cast<CmpInst>(Val: I)->getPredicate());
441	assert(P != CmpInst::BAD_ICMP_PREDICATE && "Unhandled predicate!");
442	NewV = IRB.CreateICmp(P, LHS: NewOperands [`0`], RHS: NewOperands [`1`], Name: I->getName());
443	break;
444	}
445
446	case Instruction::UIToFP:
447	NewV = IRB.CreateZExtOrTrunc(V: NewOperands [`0`], DestTy: ToTy);
448	break;
449
450	case Instruction::SIToFP:
451	NewV = IRB.CreateSExtOrTrunc(V: NewOperands [`0`], DestTy: ToTy);
452	break;
453
454	case Instruction::FNeg:
455	NewV = IRB.CreateNeg(V: NewOperands [`0`], Name: I->getName());
456	break;
457
458	case Instruction::FAdd:
459	case Instruction::FSub:
460	case Instruction::FMul:
461	NewV = IRB.CreateBinOp(Opc: mapBinOpcode(Opcode: I->getOpcode()),
462	LHS: NewOperands [`0`], RHS: NewOperands [`1`],
463	Name: I->getName());
464	break;
465	}
466
467	// If we're a root instruction, RAUW.
468	if (Roots.count(key: I))
469	I->replaceAllUsesWith(V: NewV);
470
471	ConvertedInsts [I] = NewV;
472	return NewV;
473	}
474
475	// Perform dead code elimination on the instructions we just modified.
476	void Float2IntPass::cleanup() {
477	for (auto &I : reverse(C&: ConvertedInsts))
478	I.first->eraseFromParent();
479	}
480
481	bool Float2IntPass::runImpl(Function &F, const DominatorTree &DT) {
482	LLVM_DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n");
483	// Clear out all state.
484	ECs = EquivalenceClasses<Instruction*>();
485	SeenInsts.clear();
486	ConvertedInsts.clear();
487	Roots.clear();
488
489	Ctx = &F.getParent()->getContext();
490
491	findRoots(F, DT);
492
493	walkBackwards();
494	walkForwards();
495
496	const DataLayout &DL = F.getDataLayout();
497	bool Modified = validateAndTransform(DL);
498	if (Modified)
499	cleanup();
500	return Modified;
501	}
502
503	PreservedAnalyses Float2IntPass::run(Function &F, FunctionAnalysisManager &AM) {
504	const DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
505	if (!runImpl(F, DT))
506	return PreservedAnalyses::all();
507
508	PreservedAnalyses PA;
509	PA.preserveSet<CFGAnalyses>();
510	return PA;
511	}
512

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/Float2Int.cpp