1//===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines routines for folding instructions into constants.
10//
11// Also, to supplement the basic IR ConstantExpr simplifications,
12// this file defines some additional folding routines that can make use of
13// DataLayout information. These functions cannot go in IR due to library
14// dependency issues.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/Analysis/ConstantFolding.h"
19#include "llvm/ADT/APFloat.h"
20#include "llvm/ADT/APInt.h"
21#include "llvm/ADT/APSInt.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/ADT/StringRef.h"
27#include "llvm/Analysis/TargetFolder.h"
28#include "llvm/Analysis/TargetLibraryInfo.h"
29#include "llvm/Analysis/ValueTracking.h"
30#include "llvm/Analysis/VectorUtils.h"
31#include "llvm/Config/config.h"
32#include "llvm/IR/Constant.h"
33#include "llvm/IR/ConstantFold.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/Function.h"
38#include "llvm/IR/GlobalValue.h"
39#include "llvm/IR/GlobalVariable.h"
40#include "llvm/IR/InstrTypes.h"
41#include "llvm/IR/Instruction.h"
42#include "llvm/IR/Instructions.h"
43#include "llvm/IR/IntrinsicInst.h"
44#include "llvm/IR/Intrinsics.h"
45#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/IntrinsicsAMDGPU.h"
47#include "llvm/IR/IntrinsicsARM.h"
48#include "llvm/IR/IntrinsicsNVPTX.h"
49#include "llvm/IR/IntrinsicsWebAssembly.h"
50#include "llvm/IR/IntrinsicsX86.h"
51#include "llvm/IR/NVVMIntrinsicUtils.h"
52#include "llvm/IR/Operator.h"
53#include "llvm/IR/Type.h"
54#include "llvm/IR/Value.h"
55#include "llvm/Support/Casting.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/KnownBits.h"
58#include "llvm/Support/MathExtras.h"
59#include <cassert>
60#include <cerrno>
61#include <cfenv>
62#include <cmath>
63#include <cstdint>
64
65using namespace llvm;
66
67static cl::opt<bool> DisableFPCallFolding(
68 "disable-fp-call-folding",
69 cl::desc("Disable constant-folding of FP intrinsics and libcalls."),
70 cl::init(Val: false), cl::Hidden);
71
72namespace {
73
74//===----------------------------------------------------------------------===//
75// Constant Folding internal helper functions
76//===----------------------------------------------------------------------===//
77
78static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy,
79 Constant *C, Type *SrcEltTy,
80 unsigned NumSrcElts,
81 const DataLayout &DL) {
82 // Now that we know that the input value is a vector of integers, just shift
83 // and insert them into our result.
84 unsigned BitShift = DL.getTypeSizeInBits(Ty: SrcEltTy);
85 for (unsigned i = 0; i != NumSrcElts; ++i) {
86 Constant *Element;
87 if (DL.isLittleEndian())
88 Element = C->getAggregateElement(Elt: NumSrcElts - i - 1);
89 else
90 Element = C->getAggregateElement(Elt: i);
91
92 if (isa_and_nonnull<UndefValue>(Val: Element)) {
93 Result <<= BitShift;
94 continue;
95 }
96
97 auto *ElementCI = dyn_cast_or_null<ConstantInt>(Val: Element);
98 if (!ElementCI)
99 return ConstantExpr::getBitCast(C, Ty: DestTy);
100
101 Result <<= BitShift;
102 Result |= ElementCI->getValue().zext(width: Result.getBitWidth());
103 }
104
105 return nullptr;
106}
107
108/// Constant fold bitcast, symbolically evaluating it with DataLayout.
109/// This always returns a non-null constant, but it may be a
110/// ConstantExpr if unfoldable.
111Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
112 assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) &&
113 "Invalid constantexpr bitcast!");
114
115 // Catch the obvious splat cases.
116 if (Constant *Res = ConstantFoldLoadFromUniformValue(C, Ty: DestTy, DL))
117 return Res;
118
119 if (auto *VTy = dyn_cast<VectorType>(Val: C->getType())) {
120 // Handle a vector->scalar integer/fp cast.
121 if (isa<IntegerType>(Val: DestTy) || DestTy->isFloatingPointTy()) {
122 unsigned NumSrcElts = cast<FixedVectorType>(Val: VTy)->getNumElements();
123 Type *SrcEltTy = VTy->getElementType();
124
125 // If the vector is a vector of floating point, convert it to vector of int
126 // to simplify things.
127 if (SrcEltTy->isFloatingPointTy()) {
128 unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
129 auto *SrcIVTy = FixedVectorType::get(
130 ElementType: IntegerType::get(C&: C->getContext(), NumBits: FPWidth), NumElts: NumSrcElts);
131 // Ask IR to do the conversion now that #elts line up.
132 C = ConstantExpr::getBitCast(C, Ty: SrcIVTy);
133 }
134
135 APInt Result(DL.getTypeSizeInBits(Ty: DestTy), 0);
136 if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C,
137 SrcEltTy, NumSrcElts, DL))
138 return CE;
139
140 if (isa<IntegerType>(Val: DestTy))
141 return ConstantInt::get(Ty: DestTy, V: Result);
142
143 APFloat FP(DestTy->getFltSemantics(), Result);
144 return ConstantFP::get(Context&: DestTy->getContext(), V: FP);
145 }
146 }
147
148 // The code below only handles casts to vectors currently.
149 auto *DestVTy = dyn_cast<VectorType>(Val: DestTy);
150 if (!DestVTy)
151 return ConstantExpr::getBitCast(C, Ty: DestTy);
152
153 // If this is a scalar -> vector cast, convert the input into a <1 x scalar>
154 // vector so the code below can handle it uniformly.
155 if (!isa<VectorType>(Val: C->getType()) &&
156 (isa<ConstantFP>(Val: C) || isa<ConstantInt>(Val: C))) {
157 Constant *Ops = C; // don't take the address of C!
158 return FoldBitCast(C: ConstantVector::get(V: Ops), DestTy, DL);
159 }
160
161 // Some of what follows may extend to cover scalable vectors but the current
162 // implementation is fixed length specific.
163 if (!isa<FixedVectorType>(Val: C->getType()))
164 return ConstantExpr::getBitCast(C, Ty: DestTy);
165
166 // If this is a bitcast from constant vector -> vector, fold it.
167 if (!isa<ConstantDataVector>(Val: C) && !isa<ConstantVector>(Val: C) &&
168 !isa<ConstantInt>(Val: C) && !isa<ConstantFP>(Val: C))
169 return ConstantExpr::getBitCast(C, Ty: DestTy);
170
171 // If the element types match, IR can fold it.
172 unsigned NumDstElt = cast<FixedVectorType>(Val: DestVTy)->getNumElements();
173 unsigned NumSrcElt = cast<FixedVectorType>(Val: C->getType())->getNumElements();
174 if (NumDstElt == NumSrcElt)
175 return ConstantExpr::getBitCast(C, Ty: DestTy);
176
177 Type *SrcEltTy = cast<VectorType>(Val: C->getType())->getElementType();
178 Type *DstEltTy = DestVTy->getElementType();
179
180 // Otherwise, we're changing the number of elements in a vector, which
181 // requires endianness information to do the right thing. For example,
182 // bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
183 // folds to (little endian):
184 // <4 x i32> <i32 0, i32 0, i32 1, i32 0>
185 // and to (big endian):
186 // <4 x i32> <i32 0, i32 0, i32 0, i32 1>
187
188 // First thing is first. We only want to think about integer here, so if
189 // we have something in FP form, recast it as integer.
190 if (DstEltTy->isFloatingPointTy()) {
191 // Fold to an vector of integers with same size as our FP type.
192 unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
193 auto *DestIVTy = FixedVectorType::get(
194 ElementType: IntegerType::get(C&: C->getContext(), NumBits: FPWidth), NumElts: NumDstElt);
195 // Recursively handle this integer conversion, if possible.
196 C = FoldBitCast(C, DestTy: DestIVTy, DL);
197
198 // Finally, IR can handle this now that #elts line up.
199 return ConstantExpr::getBitCast(C, Ty: DestTy);
200 }
201
202 // Okay, we know the destination is integer, if the input is FP, convert
203 // it to integer first.
204 if (SrcEltTy->isFloatingPointTy()) {
205 unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
206 auto *SrcIVTy = FixedVectorType::get(
207 ElementType: IntegerType::get(C&: C->getContext(), NumBits: FPWidth), NumElts: NumSrcElt);
208 // Ask IR to do the conversion now that #elts line up.
209 C = ConstantExpr::getBitCast(C, Ty: SrcIVTy);
210 assert((isa<ConstantVector>(C) || // FIXME: Remove ConstantVector.
211 isa<ConstantDataVector>(C) || isa<ConstantInt>(C)) &&
212 "Constant folding cannot fail for plain fp->int bitcast!");
213 }
214
215 // Now we know that the input and output vectors are both integer vectors
216 // of the same size, and that their #elements is not the same. Do the
217 // conversion here, which depends on whether the input or output has
218 // more elements.
219 bool isLittleEndian = DL.isLittleEndian();
220
221 SmallVector<Constant*, 32> Result;
222 if (NumDstElt < NumSrcElt) {
223 // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
224 Constant *Zero = Constant::getNullValue(Ty: DstEltTy);
225 unsigned Ratio = NumSrcElt/NumDstElt;
226 unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
227 unsigned SrcElt = 0;
228 for (unsigned i = 0; i != NumDstElt; ++i) {
229 // Build each element of the result.
230 Constant *Elt = Zero;
231 unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
232 for (unsigned j = 0; j != Ratio; ++j) {
233 Constant *Src = C->getAggregateElement(Elt: SrcElt++);
234 if (isa_and_nonnull<UndefValue>(Val: Src))
235 Src = Constant::getNullValue(
236 Ty: cast<VectorType>(Val: C->getType())->getElementType());
237 else
238 Src = dyn_cast_or_null<ConstantInt>(Val: Src);
239 if (!Src) // Reject constantexpr elements.
240 return ConstantExpr::getBitCast(C, Ty: DestTy);
241
242 // Zero extend the element to the right size.
243 Src = ConstantFoldCastOperand(Opcode: Instruction::ZExt, C: Src, DestTy: Elt->getType(),
244 DL);
245 assert(Src && "Constant folding cannot fail on plain integers");
246
247 // Shift it to the right place, depending on endianness.
248 Src = ConstantFoldBinaryOpOperands(
249 Opcode: Instruction::Shl, LHS: Src, RHS: ConstantInt::get(Ty: Src->getType(), V: ShiftAmt),
250 DL);
251 assert(Src && "Constant folding cannot fail on plain integers");
252
253 ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
254
255 // Mix it in.
256 Elt = ConstantFoldBinaryOpOperands(Opcode: Instruction::Or, LHS: Elt, RHS: Src, DL);
257 assert(Elt && "Constant folding cannot fail on plain integers");
258 }
259 Result.push_back(Elt);
260 }
261 return ConstantVector::get(V: Result);
262 }
263
264 // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
265 unsigned Ratio = NumDstElt/NumSrcElt;
266 unsigned DstBitSize = DL.getTypeSizeInBits(Ty: DstEltTy);
267
268 // Loop over each source value, expanding into multiple results.
269 for (unsigned i = 0; i != NumSrcElt; ++i) {
270 auto *Element = C->getAggregateElement(Elt: i);
271
272 if (!Element) // Reject constantexpr elements.
273 return ConstantExpr::getBitCast(C, Ty: DestTy);
274
275 if (isa<UndefValue>(Val: Element)) {
276 // Correctly Propagate undef values.
277 Result.append(NumInputs: Ratio, Elt: UndefValue::get(T: DstEltTy));
278 continue;
279 }
280
281 auto *Src = dyn_cast<ConstantInt>(Val: Element);
282 if (!Src)
283 return ConstantExpr::getBitCast(C, Ty: DestTy);
284
285 unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
286 for (unsigned j = 0; j != Ratio; ++j) {
287 // Shift the piece of the value into the right place, depending on
288 // endianness.
289 APInt Elt = Src->getValue().lshr(shiftAmt: ShiftAmt);
290 ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
291
292 // Truncate and remember this piece.
293 Result.push_back(Elt: ConstantInt::get(Ty: DstEltTy, V: Elt.trunc(width: DstBitSize)));
294 }
295 }
296
297 return ConstantVector::get(V: Result);
298}
299
300} // end anonymous namespace
301
302/// If this constant is a constant offset from a global, return the global and
303/// the constant. Because of constantexprs, this function is recursive.
304bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
305 APInt &Offset, const DataLayout &DL,
306 DSOLocalEquivalent **DSOEquiv) {
307 if (DSOEquiv)
308 *DSOEquiv = nullptr;
309
310 // Trivial case, constant is the global.
311 if ((GV = dyn_cast<GlobalValue>(Val: C))) {
312 unsigned BitWidth = DL.getIndexTypeSizeInBits(Ty: GV->getType());
313 Offset = APInt(BitWidth, 0);
314 return true;
315 }
316
317 if (auto *FoundDSOEquiv = dyn_cast<DSOLocalEquivalent>(Val: C)) {
318 if (DSOEquiv)
319 *DSOEquiv = FoundDSOEquiv;
320 GV = FoundDSOEquiv->getGlobalValue();
321 unsigned BitWidth = DL.getIndexTypeSizeInBits(Ty: GV->getType());
322 Offset = APInt(BitWidth, 0);
323 return true;
324 }
325
326 // Otherwise, if this isn't a constant expr, bail out.
327 auto *CE = dyn_cast<ConstantExpr>(Val: C);
328 if (!CE) return false;
329
330 // Look through ptr->int and ptr->ptr casts.
331 if (CE->getOpcode() == Instruction::PtrToInt ||
332 CE->getOpcode() == Instruction::PtrToAddr ||
333 CE->getOpcode() == Instruction::BitCast)
334 return IsConstantOffsetFromGlobal(C: CE->getOperand(i_nocapture: 0), GV, Offset, DL,
335 DSOEquiv);
336
337 // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
338 auto *GEP = dyn_cast<GEPOperator>(Val: CE);
339 if (!GEP)
340 return false;
341
342 unsigned BitWidth = DL.getIndexTypeSizeInBits(Ty: GEP->getType());
343 APInt TmpOffset(BitWidth, 0);
344
345 // If the base isn't a global+constant, we aren't either.
346 if (!IsConstantOffsetFromGlobal(C: CE->getOperand(i_nocapture: 0), GV, Offset&: TmpOffset, DL,
347 DSOEquiv))
348 return false;
349
350 // Otherwise, add any offset that our operands provide.
351 if (!GEP->accumulateConstantOffset(DL, Offset&: TmpOffset))
352 return false;
353
354 Offset = TmpOffset;
355 return true;
356}
357
358Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
359 const DataLayout &DL) {
360 do {
361 Type *SrcTy = C->getType();
362 if (SrcTy == DestTy)
363 return C;
364
365 TypeSize DestSize = DL.getTypeSizeInBits(Ty: DestTy);
366 TypeSize SrcSize = DL.getTypeSizeInBits(Ty: SrcTy);
367 if (!TypeSize::isKnownGE(LHS: SrcSize, RHS: DestSize))
368 return nullptr;
369
370 // Catch the obvious splat cases (since all-zeros can coerce non-integral
371 // pointers legally).
372 if (Constant *Res = ConstantFoldLoadFromUniformValue(C, Ty: DestTy, DL))
373 return Res;
374
375 // If the type sizes are the same and a cast is legal, just directly
376 // cast the constant.
377 // But be careful not to coerce non-integral pointers illegally.
378 if (SrcSize == DestSize &&
379 DL.isNonIntegralPointerType(Ty: SrcTy->getScalarType()) ==
380 DL.isNonIntegralPointerType(Ty: DestTy->getScalarType())) {
381 Instruction::CastOps Cast = Instruction::BitCast;
382 // If we are going from a pointer to int or vice versa, we spell the cast
383 // differently.
384 if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
385 Cast = Instruction::IntToPtr;
386 else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
387 Cast = Instruction::PtrToInt;
388
389 if (CastInst::castIsValid(op: Cast, S: C, DstTy: DestTy))
390 return ConstantFoldCastOperand(Opcode: Cast, C, DestTy, DL);
391 }
392
393 // If this isn't an aggregate type, there is nothing we can do to drill down
394 // and find a bitcastable constant.
395 if (!SrcTy->isAggregateType() && !SrcTy->isVectorTy())
396 return nullptr;
397
398 // We're simulating a load through a pointer that was bitcast to point to
399 // a different type, so we can try to walk down through the initial
400 // elements of an aggregate to see if some part of the aggregate is
401 // castable to implement the "load" semantic model.
402 if (SrcTy->isStructTy()) {
403 // Struct types might have leading zero-length elements like [0 x i32],
404 // which are certainly not what we are looking for, so skip them.
405 unsigned Elem = 0;
406 Constant *ElemC;
407 do {
408 ElemC = C->getAggregateElement(Elt: Elem++);
409 } while (ElemC && DL.getTypeSizeInBits(Ty: ElemC->getType()).isZero());
410 C = ElemC;
411 } else {
412 // For non-byte-sized vector elements, the first element is not
413 // necessarily located at the vector base address.
414 if (auto *VT = dyn_cast<VectorType>(Val: SrcTy))
415 if (!DL.typeSizeEqualsStoreSize(Ty: VT->getElementType()))
416 return nullptr;
417
418 C = C->getAggregateElement(Elt: 0u);
419 }
420 } while (C);
421
422 return nullptr;
423}
424
425namespace {
426
427/// Recursive helper to read bits out of global. C is the constant being copied
428/// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy
429/// results into and BytesLeft is the number of bytes left in
430/// the CurPtr buffer. DL is the DataLayout.
431bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,
432 unsigned BytesLeft, const DataLayout &DL) {
433 assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) &&
434 "Out of range access");
435
436 // Reading type padding, return zero.
437 if (ByteOffset >= DL.getTypeStoreSize(Ty: C->getType()))
438 return true;
439
440 // If this element is zero or undefined, we can just return since *CurPtr is
441 // zero initialized.
442 if (isa<ConstantAggregateZero>(Val: C) || isa<UndefValue>(Val: C))
443 return true;
444
445 if (auto *CI = dyn_cast<ConstantInt>(Val: C)) {
446 if ((CI->getBitWidth() & 7) != 0)
447 return false;
448 const APInt &Val = CI->getValue();
449 unsigned IntBytes = unsigned(CI->getBitWidth()/8);
450
451 for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
452 unsigned n = ByteOffset;
453 if (!DL.isLittleEndian())
454 n = IntBytes - n - 1;
455 CurPtr[i] = Val.extractBits(numBits: 8, bitPosition: n * 8).getZExtValue();
456 ++ByteOffset;
457 }
458 return true;
459 }
460
461 if (auto *CFP = dyn_cast<ConstantFP>(Val: C)) {
462 if (CFP->getType()->isDoubleTy()) {
463 C = FoldBitCast(C, DestTy: Type::getInt64Ty(C&: C->getContext()), DL);
464 return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
465 }
466 if (CFP->getType()->isFloatTy()){
467 C = FoldBitCast(C, DestTy: Type::getInt32Ty(C&: C->getContext()), DL);
468 return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
469 }
470 if (CFP->getType()->isHalfTy()){
471 C = FoldBitCast(C, DestTy: Type::getInt16Ty(C&: C->getContext()), DL);
472 return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
473 }
474 return false;
475 }
476
477 if (auto *CS = dyn_cast<ConstantStruct>(Val: C)) {
478 const StructLayout *SL = DL.getStructLayout(Ty: CS->getType());
479 unsigned Index = SL->getElementContainingOffset(FixedOffset: ByteOffset);
480 uint64_t CurEltOffset = SL->getElementOffset(Idx: Index);
481 ByteOffset -= CurEltOffset;
482
483 while (true) {
484 // If the element access is to the element itself and not to tail padding,
485 // read the bytes from the element.
486 uint64_t EltSize = DL.getTypeAllocSize(Ty: CS->getOperand(i_nocapture: Index)->getType());
487
488 if (ByteOffset < EltSize &&
489 !ReadDataFromGlobal(C: CS->getOperand(i_nocapture: Index), ByteOffset, CurPtr,
490 BytesLeft, DL))
491 return false;
492
493 ++Index;
494
495 // Check to see if we read from the last struct element, if so we're done.
496 if (Index == CS->getType()->getNumElements())
497 return true;
498
499 // If we read all of the bytes we needed from this element we're done.
500 uint64_t NextEltOffset = SL->getElementOffset(Idx: Index);
501
502 if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset)
503 return true;
504
505 // Move to the next element of the struct.
506 CurPtr += NextEltOffset - CurEltOffset - ByteOffset;
507 BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset;
508 ByteOffset = 0;
509 CurEltOffset = NextEltOffset;
510 }
511 // not reached.
512 }
513
514 if (isa<ConstantArray>(Val: C) || isa<ConstantVector>(Val: C) ||
515 isa<ConstantDataSequential>(Val: C)) {
516 uint64_t NumElts, EltSize;
517 Type *EltTy;
518 if (auto *AT = dyn_cast<ArrayType>(Val: C->getType())) {
519 NumElts = AT->getNumElements();
520 EltTy = AT->getElementType();
521 EltSize = DL.getTypeAllocSize(Ty: EltTy);
522 } else {
523 NumElts = cast<FixedVectorType>(Val: C->getType())->getNumElements();
524 EltTy = cast<FixedVectorType>(Val: C->getType())->getElementType();
525 // TODO: For non-byte-sized vectors, current implementation assumes there is
526 // padding to the next byte boundary between elements.
527 if (!DL.typeSizeEqualsStoreSize(Ty: EltTy))
528 return false;
529
530 EltSize = DL.getTypeStoreSize(Ty: EltTy);
531 }
532 uint64_t Index = ByteOffset / EltSize;
533 uint64_t Offset = ByteOffset - Index * EltSize;
534
535 for (; Index != NumElts; ++Index) {
536 if (!ReadDataFromGlobal(C: C->getAggregateElement(Elt: Index), ByteOffset: Offset, CurPtr,
537 BytesLeft, DL))
538 return false;
539
540 uint64_t BytesWritten = EltSize - Offset;
541 assert(BytesWritten <= EltSize && "Not indexing into this element?");
542 if (BytesWritten >= BytesLeft)
543 return true;
544
545 Offset = 0;
546 BytesLeft -= BytesWritten;
547 CurPtr += BytesWritten;
548 }
549 return true;
550 }
551
552 if (auto *CE = dyn_cast<ConstantExpr>(Val: C)) {
553 if (CE->getOpcode() == Instruction::IntToPtr &&
554 CE->getOperand(i_nocapture: 0)->getType() == DL.getIntPtrType(CE->getType())) {
555 return ReadDataFromGlobal(C: CE->getOperand(i_nocapture: 0), ByteOffset, CurPtr,
556 BytesLeft, DL);
557 }
558 }
559
560 // Otherwise, unknown initializer type.
561 return false;
562}
563
564Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy,
565 int64_t Offset, const DataLayout &DL) {
566 // Bail out early. Not expect to load from scalable global variable.
567 if (isa<ScalableVectorType>(Val: LoadTy))
568 return nullptr;
569
570 auto *IntType = dyn_cast<IntegerType>(Val: LoadTy);
571
572 // If this isn't an integer load we can't fold it directly.
573 if (!IntType) {
574 // If this is a non-integer load, we can try folding it as an int load and
575 // then bitcast the result. This can be useful for union cases. Note
576 // that address spaces don't matter here since we're not going to result in
577 // an actual new load.
578 if (!LoadTy->isFloatingPointTy() && !LoadTy->isPointerTy() &&
579 !LoadTy->isVectorTy())
580 return nullptr;
581
582 Type *MapTy = Type::getIntNTy(C&: C->getContext(),
583 N: DL.getTypeSizeInBits(Ty: LoadTy).getFixedValue());
584 if (Constant *Res = FoldReinterpretLoadFromConst(C, LoadTy: MapTy, Offset, DL)) {
585 if (Res->isNullValue() && !LoadTy->isX86_AMXTy())
586 // Materializing a zero can be done trivially without a bitcast
587 return Constant::getNullValue(Ty: LoadTy);
588 Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy;
589 Res = FoldBitCast(C: Res, DestTy: CastTy, DL);
590 if (LoadTy->isPtrOrPtrVectorTy()) {
591 // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr
592 if (Res->isNullValue() && !LoadTy->isX86_AMXTy())
593 return Constant::getNullValue(Ty: LoadTy);
594 if (DL.isNonIntegralPointerType(Ty: LoadTy->getScalarType()))
595 // Be careful not to replace a load of an addrspace value with an inttoptr here
596 return nullptr;
597 Res = ConstantExpr::getIntToPtr(C: Res, Ty: LoadTy);
598 }
599 return Res;
600 }
601 return nullptr;
602 }
603
604 unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
605 if (BytesLoaded > 32 || BytesLoaded == 0)
606 return nullptr;
607
608 // If we're not accessing anything in this constant, the result is undefined.
609 if (Offset <= -1 * static_cast<int64_t>(BytesLoaded))
610 return PoisonValue::get(T: IntType);
611
612 // TODO: We should be able to support scalable types.
613 TypeSize InitializerSize = DL.getTypeAllocSize(Ty: C->getType());
614 if (InitializerSize.isScalable())
615 return nullptr;
616
617 // If we're not accessing anything in this constant, the result is undefined.
618 if (Offset >= (int64_t)InitializerSize.getFixedValue())
619 return PoisonValue::get(T: IntType);
620
621 unsigned char RawBytes[32] = {0};
622 unsigned char *CurPtr = RawBytes;
623 unsigned BytesLeft = BytesLoaded;
624
625 // If we're loading off the beginning of the global, some bytes may be valid.
626 if (Offset < 0) {
627 CurPtr += -Offset;
628 BytesLeft += Offset;
629 Offset = 0;
630 }
631
632 if (!ReadDataFromGlobal(C, ByteOffset: Offset, CurPtr, BytesLeft, DL))
633 return nullptr;
634
635 APInt ResultVal = APInt(IntType->getBitWidth(), 0);
636 if (DL.isLittleEndian()) {
637 ResultVal = RawBytes[BytesLoaded - 1];
638 for (unsigned i = 1; i != BytesLoaded; ++i) {
639 ResultVal <<= 8;
640 ResultVal |= RawBytes[BytesLoaded - 1 - i];
641 }
642 } else {
643 ResultVal = RawBytes[0];
644 for (unsigned i = 1; i != BytesLoaded; ++i) {
645 ResultVal <<= 8;
646 ResultVal |= RawBytes[i];
647 }
648 }
649
650 return ConstantInt::get(Context&: IntType->getContext(), V: ResultVal);
651}
652
653} // anonymous namespace
654
655// If GV is a constant with an initializer read its representation starting
656// at Offset and return it as a constant array of unsigned char. Otherwise
657// return null.
658Constant *llvm::ReadByteArrayFromGlobal(const GlobalVariable *GV,
659 uint64_t Offset) {
660 if (!GV->isConstant() || !GV->hasDefinitiveInitializer())
661 return nullptr;
662
663 const DataLayout &DL = GV->getDataLayout();
664 Constant *Init = const_cast<Constant *>(GV->getInitializer());
665 TypeSize InitSize = DL.getTypeAllocSize(Ty: Init->getType());
666 if (InitSize < Offset)
667 return nullptr;
668
669 uint64_t NBytes = InitSize - Offset;
670 if (NBytes > UINT16_MAX)
671 // Bail for large initializers in excess of 64K to avoid allocating
672 // too much memory.
673 // Offset is assumed to be less than or equal than InitSize (this
674 // is enforced in ReadDataFromGlobal).
675 return nullptr;
676
677 SmallVector<unsigned char, 256> RawBytes(static_cast<size_t>(NBytes));
678 unsigned char *CurPtr = RawBytes.data();
679
680 if (!ReadDataFromGlobal(C: Init, ByteOffset: Offset, CurPtr, BytesLeft: NBytes, DL))
681 return nullptr;
682
683 return ConstantDataArray::get(Context&: GV->getContext(), Elts&: RawBytes);
684}
685
686/// If this Offset points exactly to the start of an aggregate element, return
687/// that element, otherwise return nullptr.
688Constant *getConstantAtOffset(Constant *Base, APInt Offset,
689 const DataLayout &DL) {
690 if (Offset.isZero())
691 return Base;
692
693 if (!isa<ConstantAggregate>(Val: Base) && !isa<ConstantDataSequential>(Val: Base))
694 return nullptr;
695
696 Type *ElemTy = Base->getType();
697 SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset);
698 if (!Offset.isZero() || !Indices[0].isZero())
699 return nullptr;
700
701 Constant *C = Base;
702 for (const APInt &Index : drop_begin(RangeOrContainer&: Indices)) {
703 if (Index.isNegative() || Index.getActiveBits() >= 32)
704 return nullptr;
705
706 C = C->getAggregateElement(Elt: Index.getZExtValue());
707 if (!C)
708 return nullptr;
709 }
710
711 return C;
712}
713
714Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,
715 const APInt &Offset,
716 const DataLayout &DL) {
717 if (Constant *AtOffset = getConstantAtOffset(Base: C, Offset, DL))
718 if (Constant *Result = ConstantFoldLoadThroughBitcast(C: AtOffset, DestTy: Ty, DL))
719 return Result;
720
721 // Explicitly check for out-of-bounds access, so we return poison even if the
722 // constant is a uniform value.
723 TypeSize Size = DL.getTypeAllocSize(Ty: C->getType());
724 if (!Size.isScalable() && Offset.sge(RHS: Size.getFixedValue()))
725 return PoisonValue::get(T: Ty);
726
727 // Try an offset-independent fold of a uniform value.
728 if (Constant *Result = ConstantFoldLoadFromUniformValue(C, Ty, DL))
729 return Result;
730
731 // Try hard to fold loads from bitcasted strange and non-type-safe things.
732 if (Offset.getSignificantBits() <= 64)
733 if (Constant *Result =
734 FoldReinterpretLoadFromConst(C, LoadTy: Ty, Offset: Offset.getSExtValue(), DL))
735 return Result;
736
737 return nullptr;
738}
739
740Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,
741 const DataLayout &DL) {
742 return ConstantFoldLoadFromConst(C, Ty, Offset: APInt(64, 0), DL);
743}
744
745Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
746 APInt Offset,
747 const DataLayout &DL) {
748 // We can only fold loads from constant globals with a definitive initializer.
749 // Check this upfront, to skip expensive offset calculations.
750 auto *GV = dyn_cast<GlobalVariable>(Val: getUnderlyingObject(V: C));
751 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
752 return nullptr;
753
754 C = cast<Constant>(Val: C->stripAndAccumulateConstantOffsets(
755 DL, Offset, /* AllowNonInbounds */ true));
756
757 if (C == GV)
758 if (Constant *Result = ConstantFoldLoadFromConst(C: GV->getInitializer(), Ty,
759 Offset, DL))
760 return Result;
761
762 // If this load comes from anywhere in a uniform constant global, the value
763 // is always the same, regardless of the loaded offset.
764 return ConstantFoldLoadFromUniformValue(C: GV->getInitializer(), Ty, DL);
765}
766
767Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
768 const DataLayout &DL) {
769 APInt Offset(DL.getIndexTypeSizeInBits(Ty: C->getType()), 0);
770 return ConstantFoldLoadFromConstPtr(C, Ty, Offset: std::move(Offset), DL);
771}
772
773Constant *llvm::ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty,
774 const DataLayout &DL) {
775 if (isa<PoisonValue>(Val: C))
776 return PoisonValue::get(T: Ty);
777 if (isa<UndefValue>(Val: C))
778 return UndefValue::get(T: Ty);
779 // If padding is needed when storing C to memory, then it isn't considered as
780 // uniform.
781 if (!DL.typeSizeEqualsStoreSize(Ty: C->getType()))
782 return nullptr;
783 if (C->isNullValue() && !Ty->isX86_AMXTy())
784 return Constant::getNullValue(Ty);
785 if (C->isAllOnesValue() &&
786 (Ty->isIntOrIntVectorTy() || Ty->isFPOrFPVectorTy()))
787 return Constant::getAllOnesValue(Ty);
788 return nullptr;
789}
790
791namespace {
792
793/// One of Op0/Op1 is a constant expression.
794/// Attempt to symbolically evaluate the result of a binary operator merging
795/// these together. If target data info is available, it is provided as DL,
796/// otherwise DL is null.
797Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
798 const DataLayout &DL) {
799 // SROA
800
801 // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
802 // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
803 // bits.
804
805 if (Opc == Instruction::And) {
806 KnownBits Known0 = computeKnownBits(V: Op0, DL);
807 KnownBits Known1 = computeKnownBits(V: Op1, DL);
808 if ((Known1.One | Known0.Zero).isAllOnes()) {
809 // All the bits of Op0 that the 'and' could be masking are already zero.
810 return Op0;
811 }
812 if ((Known0.One | Known1.Zero).isAllOnes()) {
813 // All the bits of Op1 that the 'and' could be masking are already zero.
814 return Op1;
815 }
816
817 Known0 &= Known1;
818 if (Known0.isConstant())
819 return ConstantInt::get(Ty: Op0->getType(), V: Known0.getConstant());
820 }
821
822 // If the constant expr is something like &A[123] - &A[4].f, fold this into a
823 // constant. This happens frequently when iterating over a global array.
824 if (Opc == Instruction::Sub) {
825 GlobalValue *GV1, *GV2;
826 APInt Offs1, Offs2;
827
828 if (IsConstantOffsetFromGlobal(C: Op0, GV&: GV1, Offset&: Offs1, DL))
829 if (IsConstantOffsetFromGlobal(C: Op1, GV&: GV2, Offset&: Offs2, DL) && GV1 == GV2) {
830 unsigned OpSize = DL.getTypeSizeInBits(Ty: Op0->getType());
831
832 // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
833 // PtrToInt may change the bitwidth so we have convert to the right size
834 // first.
835 return ConstantInt::get(Ty: Op0->getType(), V: Offs1.zextOrTrunc(width: OpSize) -
836 Offs2.zextOrTrunc(width: OpSize));
837 }
838 }
839
840 return nullptr;
841}
842
843/// If array indices are not pointer-sized integers, explicitly cast them so
844/// that they aren't implicitly casted by the getelementptr.
845Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
846 Type *ResultTy, GEPNoWrapFlags NW,
847 std::optional<ConstantRange> InRange,
848 const DataLayout &DL, const TargetLibraryInfo *TLI) {
849 Type *IntIdxTy = DL.getIndexType(PtrTy: ResultTy);
850 Type *IntIdxScalarTy = IntIdxTy->getScalarType();
851
852 bool Any = false;
853 SmallVector<Constant*, 32> NewIdxs;
854 for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
855 if ((i == 1 ||
856 !isa<StructType>(Val: GetElementPtrInst::getIndexedType(
857 Ty: SrcElemTy, IdxList: Ops.slice(N: 1, M: i - 1)))) &&
858 Ops[i]->getType()->getScalarType() != IntIdxScalarTy) {
859 Any = true;
860 Type *NewType =
861 Ops[i]->getType()->isVectorTy() ? IntIdxTy : IntIdxScalarTy;
862 Constant *NewIdx = ConstantFoldCastOperand(
863 Opcode: CastInst::getCastOpcode(Val: Ops[i], SrcIsSigned: true, Ty: NewType, DstIsSigned: true), C: Ops[i], DestTy: NewType,
864 DL);
865 if (!NewIdx)
866 return nullptr;
867 NewIdxs.push_back(Elt: NewIdx);
868 } else
869 NewIdxs.push_back(Elt: Ops[i]);
870 }
871
872 if (!Any)
873 return nullptr;
874
875 Constant *C =
876 ConstantExpr::getGetElementPtr(Ty: SrcElemTy, C: Ops[0], IdxList: NewIdxs, NW, InRange);
877 return ConstantFoldConstant(C, DL, TLI);
878}
879
880/// If we can symbolically evaluate the GEP constant expression, do so.
881Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
882 ArrayRef<Constant *> Ops,
883 const DataLayout &DL,
884 const TargetLibraryInfo *TLI) {
885 Type *SrcElemTy = GEP->getSourceElementType();
886 Type *ResTy = GEP->getType();
887 if (!SrcElemTy->isSized() || isa<ScalableVectorType>(Val: SrcElemTy))
888 return nullptr;
889
890 if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResultTy: ResTy, NW: GEP->getNoWrapFlags(),
891 InRange: GEP->getInRange(), DL, TLI))
892 return C;
893
894 Constant *Ptr = Ops[0];
895 if (!Ptr->getType()->isPointerTy())
896 return nullptr;
897
898 Type *IntIdxTy = DL.getIndexType(PtrTy: Ptr->getType());
899
900 for (unsigned i = 1, e = Ops.size(); i != e; ++i)
901 if (!isa<ConstantInt>(Val: Ops[i]) || !Ops[i]->getType()->isIntegerTy())
902 return nullptr;
903
904 unsigned BitWidth = DL.getTypeSizeInBits(Ty: IntIdxTy);
905 APInt Offset = APInt(
906 BitWidth,
907 DL.getIndexedOffsetInType(
908 ElemTy: SrcElemTy, Indices: ArrayRef((Value *const *)Ops.data() + 1, Ops.size() - 1)),
909 /*isSigned=*/true, /*implicitTrunc=*/true);
910
911 std::optional<ConstantRange> InRange = GEP->getInRange();
912 if (InRange)
913 InRange = InRange->sextOrTrunc(BitWidth);
914
915 // If this is a GEP of a GEP, fold it all into a single GEP.
916 GEPNoWrapFlags NW = GEP->getNoWrapFlags();
917 bool Overflow = false;
918 while (auto *GEP = dyn_cast<GEPOperator>(Val: Ptr)) {
919 NW &= GEP->getNoWrapFlags();
920
921 SmallVector<Value *, 4> NestedOps(llvm::drop_begin(RangeOrContainer: GEP->operands()));
922
923 // Do not try the incorporate the sub-GEP if some index is not a number.
924 bool AllConstantInt = true;
925 for (Value *NestedOp : NestedOps)
926 if (!isa<ConstantInt>(Val: NestedOp)) {
927 AllConstantInt = false;
928 break;
929 }
930 if (!AllConstantInt)
931 break;
932
933 // Adjust inrange offset and intersect inrange attributes
934 if (auto GEPRange = GEP->getInRange()) {
935 auto AdjustedGEPRange = GEPRange->sextOrTrunc(BitWidth).subtract(CI: Offset);
936 InRange =
937 InRange ? InRange->intersectWith(CR: AdjustedGEPRange) : AdjustedGEPRange;
938 }
939
940 Ptr = cast<Constant>(Val: GEP->getOperand(i_nocapture: 0));
941 SrcElemTy = GEP->getSourceElementType();
942 Offset = Offset.sadd_ov(
943 RHS: APInt(BitWidth, DL.getIndexedOffsetInType(ElemTy: SrcElemTy, Indices: NestedOps),
944 /*isSigned=*/true, /*implicitTrunc=*/true),
945 Overflow);
946 }
947
948 // Preserving nusw (without inbounds) also requires that the offset
949 // additions did not overflow.
950 if (NW.hasNoUnsignedSignedWrap() && !NW.isInBounds() && Overflow)
951 NW = NW.withoutNoUnsignedSignedWrap();
952
953 // If the base value for this address is a literal integer value, fold the
954 // getelementptr to the resulting integer value casted to the pointer type.
955 APInt BaseIntVal(DL.getPointerTypeSizeInBits(Ptr->getType()), 0);
956 if (auto *CE = dyn_cast<ConstantExpr>(Val: Ptr)) {
957 if (CE->getOpcode() == Instruction::IntToPtr) {
958 if (auto *Base = dyn_cast<ConstantInt>(Val: CE->getOperand(i_nocapture: 0)))
959 BaseIntVal = Base->getValue().zextOrTrunc(width: BaseIntVal.getBitWidth());
960 }
961 }
962
963 if ((Ptr->isNullValue() || BaseIntVal != 0) &&
964 !DL.mustNotIntroduceIntToPtr(Ty: Ptr->getType())) {
965
966 // If the index size is smaller than the pointer size, add to the low
967 // bits only.
968 BaseIntVal.insertBits(SubBits: BaseIntVal.trunc(width: BitWidth) + Offset, bitPosition: 0);
969 Constant *C = ConstantInt::get(Context&: Ptr->getContext(), V: BaseIntVal);
970 return ConstantExpr::getIntToPtr(C, Ty: ResTy);
971 }
972
973 // Try to infer inbounds for GEPs of globals.
974 if (!NW.isInBounds() && Offset.isNonNegative()) {
975 bool CanBeNull, CanBeFreed;
976 uint64_t DerefBytes =
977 Ptr->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
978 if (DerefBytes != 0 && !CanBeNull && Offset.sle(RHS: DerefBytes))
979 NW |= GEPNoWrapFlags::inBounds();
980 }
981
982 // nusw + nneg -> nuw
983 if (NW.hasNoUnsignedSignedWrap() && Offset.isNonNegative())
984 NW |= GEPNoWrapFlags::noUnsignedWrap();
985
986 // Otherwise canonicalize this to a single ptradd.
987 LLVMContext &Ctx = Ptr->getContext();
988 return ConstantExpr::getGetElementPtr(Ty: Type::getInt8Ty(C&: Ctx), C: Ptr,
989 Idx: ConstantInt::get(Context&: Ctx, V: Offset), NW,
990 InRange);
991}
992
993/// Attempt to constant fold an instruction with the
994/// specified opcode and operands. If successful, the constant result is
995/// returned, if not, null is returned. Note that this function can fail when
996/// attempting to fold instructions like loads and stores, which have no
997/// constant expression form.
998Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
999 ArrayRef<Constant *> Ops,
1000 const DataLayout &DL,
1001 const TargetLibraryInfo *TLI,
1002 bool AllowNonDeterministic) {
1003 Type *DestTy = InstOrCE->getType();
1004
1005 if (Instruction::isUnaryOp(Opcode))
1006 return ConstantFoldUnaryOpOperand(Opcode, Op: Ops[0], DL);
1007
1008 if (Instruction::isBinaryOp(Opcode)) {
1009 switch (Opcode) {
1010 default:
1011 break;
1012 case Instruction::FAdd:
1013 case Instruction::FSub:
1014 case Instruction::FMul:
1015 case Instruction::FDiv:
1016 case Instruction::FRem:
1017 // Handle floating point instructions separately to account for denormals
1018 // TODO: If a constant expression is being folded rather than an
1019 // instruction, denormals will not be flushed/treated as zero
1020 if (const auto *I = dyn_cast<Instruction>(Val: InstOrCE)) {
1021 return ConstantFoldFPInstOperands(Opcode, LHS: Ops[0], RHS: Ops[1], DL, I,
1022 AllowNonDeterministic);
1023 }
1024 }
1025 return ConstantFoldBinaryOpOperands(Opcode, LHS: Ops[0], RHS: Ops[1], DL);
1026 }
1027
1028 if (Instruction::isCast(Opcode))
1029 return ConstantFoldCastOperand(Opcode, C: Ops[0], DestTy, DL);
1030
1031 if (auto *GEP = dyn_cast<GEPOperator>(Val: InstOrCE)) {
1032 Type *SrcElemTy = GEP->getSourceElementType();
1033 if (!ConstantExpr::isSupportedGetElementPtr(SrcElemTy))
1034 return nullptr;
1035
1036 if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI))
1037 return C;
1038
1039 return ConstantExpr::getGetElementPtr(Ty: SrcElemTy, C: Ops[0], IdxList: Ops.slice(N: 1),
1040 NW: GEP->getNoWrapFlags(),
1041 InRange: GEP->getInRange());
1042 }
1043
1044 if (auto *CE = dyn_cast<ConstantExpr>(Val: InstOrCE))
1045 return CE->getWithOperands(Ops);
1046
1047 switch (Opcode) {
1048 default: return nullptr;
1049 case Instruction::ICmp:
1050 case Instruction::FCmp: {
1051 auto *C = cast<CmpInst>(Val: InstOrCE);
1052 return ConstantFoldCompareInstOperands(Predicate: C->getPredicate(), LHS: Ops[0], RHS: Ops[1],
1053 DL, TLI, I: C);
1054 }
1055 case Instruction::Freeze:
1056 return isGuaranteedNotToBeUndefOrPoison(V: Ops[0]) ? Ops[0] : nullptr;
1057 case Instruction::Call:
1058 if (auto *F = dyn_cast<Function>(Val: Ops.back())) {
1059 const auto *Call = cast<CallBase>(Val: InstOrCE);
1060 if (canConstantFoldCallTo(Call, F))
1061 return ConstantFoldCall(Call, F, Operands: Ops.slice(N: 0, M: Ops.size() - 1), TLI,
1062 AllowNonDeterministic);
1063 }
1064 return nullptr;
1065 case Instruction::Select:
1066 return ConstantFoldSelectInstruction(Cond: Ops[0], V1: Ops[1], V2: Ops[2]);
1067 case Instruction::ExtractElement:
1068 return ConstantExpr::getExtractElement(Vec: Ops[0], Idx: Ops[1]);
1069 case Instruction::ExtractValue:
1070 return ConstantFoldExtractValueInstruction(
1071 Agg: Ops[0], Idxs: cast<ExtractValueInst>(Val: InstOrCE)->getIndices());
1072 case Instruction::InsertElement:
1073 return ConstantExpr::getInsertElement(Vec: Ops[0], Elt: Ops[1], Idx: Ops[2]);
1074 case Instruction::InsertValue:
1075 return ConstantFoldInsertValueInstruction(
1076 Agg: Ops[0], Val: Ops[1], Idxs: cast<InsertValueInst>(Val: InstOrCE)->getIndices());
1077 case Instruction::ShuffleVector:
1078 return ConstantExpr::getShuffleVector(
1079 V1: Ops[0], V2: Ops[1], Mask: cast<ShuffleVectorInst>(Val: InstOrCE)->getShuffleMask());
1080 case Instruction::Load: {
1081 const auto *LI = dyn_cast<LoadInst>(Val: InstOrCE);
1082 if (LI->isVolatile())
1083 return nullptr;
1084 return ConstantFoldLoadFromConstPtr(C: Ops[0], Ty: LI->getType(), DL);
1085 }
1086 }
1087}
1088
1089} // end anonymous namespace
1090
1091//===----------------------------------------------------------------------===//
1092// Constant Folding public APIs
1093//===----------------------------------------------------------------------===//
1094
1095namespace {
1096
1097Constant *
1098ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,
1099 const TargetLibraryInfo *TLI,
1100 SmallDenseMap<Constant *, Constant *> &FoldedOps) {
1101 if (!isa<ConstantVector>(Val: C) && !isa<ConstantExpr>(Val: C))
1102 return const_cast<Constant *>(C);
1103
1104 SmallVector<Constant *, 8> Ops;
1105 for (const Use &OldU : C->operands()) {
1106 Constant *OldC = cast<Constant>(Val: &OldU);
1107 Constant *NewC = OldC;
1108 // Recursively fold the ConstantExpr's operands. If we have already folded
1109 // a ConstantExpr, we don't have to process it again.
1110 if (isa<ConstantVector>(Val: OldC) || isa<ConstantExpr>(Val: OldC)) {
1111 auto It = FoldedOps.find(Val: OldC);
1112 if (It == FoldedOps.end()) {
1113 NewC = ConstantFoldConstantImpl(C: OldC, DL, TLI, FoldedOps);
1114 FoldedOps.insert(KV: {OldC, NewC});
1115 } else {
1116 NewC = It->second;
1117 }
1118 }
1119 Ops.push_back(Elt: NewC);
1120 }
1121
1122 if (auto *CE = dyn_cast<ConstantExpr>(Val: C)) {
1123 if (Constant *Res = ConstantFoldInstOperandsImpl(
1124 InstOrCE: CE, Opcode: CE->getOpcode(), Ops, DL, TLI, /*AllowNonDeterministic=*/true))
1125 return Res;
1126 return const_cast<Constant *>(C);
1127 }
1128
1129 assert(isa<ConstantVector>(C));
1130 return ConstantVector::get(V: Ops);
1131}
1132
1133} // end anonymous namespace
1134
1135Constant *llvm::ConstantFoldInstruction(const Instruction *I,
1136 const DataLayout &DL,
1137 const TargetLibraryInfo *TLI) {
1138 // Handle PHI nodes quickly here...
1139 if (auto *PN = dyn_cast<PHINode>(Val: I)) {
1140 Constant *CommonValue = nullptr;
1141
1142 SmallDenseMap<Constant *, Constant *> FoldedOps;
1143 for (Value *Incoming : PN->incoming_values()) {
1144 // If the incoming value is undef then skip it. Note that while we could
1145 // skip the value if it is equal to the phi node itself we choose not to
1146 // because that would break the rule that constant folding only applies if
1147 // all operands are constants.
1148 if (isa<UndefValue>(Val: Incoming))
1149 continue;
1150 // If the incoming value is not a constant, then give up.
1151 auto *C = dyn_cast<Constant>(Val: Incoming);
1152 if (!C)
1153 return nullptr;
1154 // Fold the PHI's operands.
1155 C = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);
1156 // If the incoming value is a different constant to
1157 // the one we saw previously, then give up.
1158 if (CommonValue && C != CommonValue)
1159 return nullptr;
1160 CommonValue = C;
1161 }
1162
1163 // If we reach here, all incoming values are the same constant or undef.
1164 return CommonValue ? CommonValue : UndefValue::get(T: PN->getType());
1165 }
1166
1167 // Scan the operand list, checking to see if they are all constants, if so,
1168 // hand off to ConstantFoldInstOperandsImpl.
1169 if (!all_of(Range: I->operands(), P: [](const Use &U) { return isa<Constant>(Val: U); }))
1170 return nullptr;
1171
1172 SmallDenseMap<Constant *, Constant *> FoldedOps;
1173 SmallVector<Constant *, 8> Ops;
1174 for (const Use &OpU : I->operands()) {
1175 auto *Op = cast<Constant>(Val: &OpU);
1176 // Fold the Instruction's operands.
1177 Op = ConstantFoldConstantImpl(C: Op, DL, TLI, FoldedOps);
1178 Ops.push_back(Elt: Op);
1179 }
1180
1181 return ConstantFoldInstOperands(I, Ops, DL, TLI);
1182}
1183
1184Constant *llvm::ConstantFoldConstant(const Constant *C, const DataLayout &DL,
1185 const TargetLibraryInfo *TLI) {
1186 SmallDenseMap<Constant *, Constant *> FoldedOps;
1187 return ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);
1188}
1189
1190Constant *llvm::ConstantFoldInstOperands(const Instruction *I,
1191 ArrayRef<Constant *> Ops,
1192 const DataLayout &DL,
1193 const TargetLibraryInfo *TLI,
1194 bool AllowNonDeterministic) {
1195 return ConstantFoldInstOperandsImpl(InstOrCE: I, Opcode: I->getOpcode(), Ops, DL, TLI,
1196 AllowNonDeterministic);
1197}
1198
1199Constant *llvm::ConstantFoldCompareInstOperands(
1200 unsigned IntPredicate, Constant *Ops0, Constant *Ops1, const DataLayout &DL,
1201 const TargetLibraryInfo *TLI, const Instruction *I) {
1202 CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate;
1203 // fold: icmp (inttoptr x), null -> icmp x, 0
1204 // fold: icmp null, (inttoptr x) -> icmp 0, x
1205 // fold: icmp (ptrtoint x), 0 -> icmp x, null
1206 // fold: icmp 0, (ptrtoint x) -> icmp null, x
1207 // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
1208 // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
1209 //
1210 // FIXME: The following comment is out of data and the DataLayout is here now.
1211 // ConstantExpr::getCompare cannot do this, because it doesn't have DL
1212 // around to know if bit truncation is happening.
1213 if (auto *CE0 = dyn_cast<ConstantExpr>(Val: Ops0)) {
1214 if (Ops1->isNullValue()) {
1215 if (CE0->getOpcode() == Instruction::IntToPtr) {
1216 Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
1217 // Convert the integer value to the right size to ensure we get the
1218 // proper extension or truncation.
1219 if (Constant *C = ConstantFoldIntegerCast(C: CE0->getOperand(i_nocapture: 0), DestTy: IntPtrTy,
1220 /*IsSigned*/ false, DL)) {
1221 Constant *Null = Constant::getNullValue(Ty: C->getType());
1222 return ConstantFoldCompareInstOperands(IntPredicate: Predicate, Ops0: C, Ops1: Null, DL, TLI);
1223 }
1224 }
1225
1226 // icmp only compares the address part of the pointer, so only do this
1227 // transform if the integer size matches the address size.
1228 if (CE0->getOpcode() == Instruction::PtrToInt ||
1229 CE0->getOpcode() == Instruction::PtrToAddr) {
1230 Type *AddrTy = DL.getAddressType(PtrTy: CE0->getOperand(i_nocapture: 0)->getType());
1231 if (CE0->getType() == AddrTy) {
1232 Constant *C = CE0->getOperand(i_nocapture: 0);
1233 Constant *Null = Constant::getNullValue(Ty: C->getType());
1234 return ConstantFoldCompareInstOperands(IntPredicate: Predicate, Ops0: C, Ops1: Null, DL, TLI);
1235 }
1236 }
1237 }
1238
1239 if (auto *CE1 = dyn_cast<ConstantExpr>(Val: Ops1)) {
1240 if (CE0->getOpcode() == CE1->getOpcode()) {
1241 if (CE0->getOpcode() == Instruction::IntToPtr) {
1242 Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
1243
1244 // Convert the integer value to the right size to ensure we get the
1245 // proper extension or truncation.
1246 Constant *C0 = ConstantFoldIntegerCast(C: CE0->getOperand(i_nocapture: 0), DestTy: IntPtrTy,
1247 /*IsSigned*/ false, DL);
1248 Constant *C1 = ConstantFoldIntegerCast(C: CE1->getOperand(i_nocapture: 0), DestTy: IntPtrTy,
1249 /*IsSigned*/ false, DL);
1250 if (C0 && C1)
1251 return ConstantFoldCompareInstOperands(IntPredicate: Predicate, Ops0: C0, Ops1: C1, DL, TLI);
1252 }
1253
1254 // icmp only compares the address part of the pointer, so only do this
1255 // transform if the integer size matches the address size.
1256 if (CE0->getOpcode() == Instruction::PtrToInt ||
1257 CE0->getOpcode() == Instruction::PtrToAddr) {
1258 Type *AddrTy = DL.getAddressType(PtrTy: CE0->getOperand(i_nocapture: 0)->getType());
1259 if (CE0->getType() == AddrTy &&
1260 CE0->getOperand(i_nocapture: 0)->getType() == CE1->getOperand(i_nocapture: 0)->getType()) {
1261 return ConstantFoldCompareInstOperands(
1262 IntPredicate: Predicate, Ops0: CE0->getOperand(i_nocapture: 0), Ops1: CE1->getOperand(i_nocapture: 0), DL, TLI);
1263 }
1264 }
1265 }
1266 }
1267
1268 // Convert pointer comparison (base+offset1) pred (base+offset2) into
1269 // offset1 pred offset2, for the case where the offset is inbounds. This
1270 // only works for equality and unsigned comparison, as inbounds permits
1271 // crossing the sign boundary. However, the offset comparison itself is
1272 // signed.
1273 if (Ops0->getType()->isPointerTy() && !ICmpInst::isSigned(predicate: Predicate)) {
1274 unsigned IndexWidth = DL.getIndexTypeSizeInBits(Ty: Ops0->getType());
1275 APInt Offset0(IndexWidth, 0);
1276 bool IsEqPred = ICmpInst::isEquality(P: Predicate);
1277 Value *Stripped0 = Ops0->stripAndAccumulateConstantOffsets(
1278 DL, Offset&: Offset0, /*AllowNonInbounds=*/IsEqPred,
1279 /*AllowInvariantGroup=*/false, /*ExternalAnalysis=*/nullptr,
1280 /*LookThroughIntToPtr=*/IsEqPred);
1281 APInt Offset1(IndexWidth, 0);
1282 Value *Stripped1 = Ops1->stripAndAccumulateConstantOffsets(
1283 DL, Offset&: Offset1, /*AllowNonInbounds=*/IsEqPred,
1284 /*AllowInvariantGroup=*/false, /*ExternalAnalysis=*/nullptr,
1285 /*LookThroughIntToPtr=*/IsEqPred);
1286 if (Stripped0 == Stripped1)
1287 return ConstantInt::getBool(
1288 Context&: Ops0->getContext(),
1289 V: ICmpInst::compare(LHS: Offset0, RHS: Offset1,
1290 Pred: ICmpInst::getSignedPredicate(Pred: Predicate)));
1291 }
1292 } else if (isa<ConstantExpr>(Val: Ops1)) {
1293 // If RHS is a constant expression, but the left side isn't, swap the
1294 // operands and try again.
1295 Predicate = ICmpInst::getSwappedPredicate(pred: Predicate);
1296 return ConstantFoldCompareInstOperands(IntPredicate: Predicate, Ops0: Ops1, Ops1: Ops0, DL, TLI);
1297 }
1298
1299 if (CmpInst::isFPPredicate(P: Predicate)) {
1300 // Flush any denormal constant float input according to denormal handling
1301 // mode.
1302 Ops0 = FlushFPConstant(Operand: Ops0, I, /*IsOutput=*/false);
1303 if (!Ops0)
1304 return nullptr;
1305 Ops1 = FlushFPConstant(Operand: Ops1, I, /*IsOutput=*/false);
1306 if (!Ops1)
1307 return nullptr;
1308 }
1309
1310 return ConstantFoldCompareInstruction(Predicate, C1: Ops0, C2: Ops1);
1311}
1312
1313Constant *llvm::ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op,
1314 const DataLayout &DL) {
1315 assert(Instruction::isUnaryOp(Opcode));
1316
1317 return ConstantFoldUnaryInstruction(Opcode, V: Op);
1318}
1319
1320Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS,
1321 Constant *RHS,
1322 const DataLayout &DL) {
1323 assert(Instruction::isBinaryOp(Opcode));
1324 if (isa<ConstantExpr>(Val: LHS) || isa<ConstantExpr>(Val: RHS))
1325 if (Constant *C = SymbolicallyEvaluateBinop(Opc: Opcode, Op0: LHS, Op1: RHS, DL))
1326 return C;
1327
1328 if (ConstantExpr::isDesirableBinOp(Opcode))
1329 return ConstantExpr::get(Opcode, C1: LHS, C2: RHS);
1330 return ConstantFoldBinaryInstruction(Opcode, V1: LHS, V2: RHS);
1331}
1332
1333static ConstantFP *flushDenormalConstant(Type *Ty, const APFloat &APF,
1334 DenormalMode::DenormalModeKind Mode) {
1335 switch (Mode) {
1336 case DenormalMode::Dynamic:
1337 return nullptr;
1338 case DenormalMode::IEEE:
1339 return ConstantFP::get(Context&: Ty->getContext(), V: APF);
1340 case DenormalMode::PreserveSign:
1341 return ConstantFP::get(
1342 Context&: Ty->getContext(),
1343 V: APFloat::getZero(Sem: APF.getSemantics(), Negative: APF.isNegative()));
1344 case DenormalMode::PositiveZero:
1345 return ConstantFP::get(Context&: Ty->getContext(),
1346 V: APFloat::getZero(Sem: APF.getSemantics(), Negative: false));
1347 default:
1348 break;
1349 }
1350
1351 llvm_unreachable("unknown denormal mode");
1352}
1353
1354/// Return the denormal mode that can be assumed when executing a floating point
1355/// operation at \p CtxI.
1356static DenormalMode getInstrDenormalMode(const Instruction *CtxI, Type *Ty) {
1357 if (!CtxI || !CtxI->getParent() || !CtxI->getFunction())
1358 return DenormalMode::getDynamic();
1359 return CtxI->getFunction()->getDenormalMode(FPType: Ty->getFltSemantics());
1360}
1361
1362static ConstantFP *flushDenormalConstantFP(ConstantFP *CFP,
1363 const Instruction *Inst,
1364 bool IsOutput) {
1365 const APFloat &APF = CFP->getValueAPF();
1366 if (!APF.isDenormal())
1367 return CFP;
1368
1369 DenormalMode Mode = getInstrDenormalMode(CtxI: Inst, Ty: CFP->getType());
1370 return flushDenormalConstant(Ty: CFP->getType(), APF,
1371 Mode: IsOutput ? Mode.Output : Mode.Input);
1372}
1373
1374Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *Inst,
1375 bool IsOutput) {
1376 if (ConstantFP *CFP = dyn_cast<ConstantFP>(Val: Operand))
1377 return flushDenormalConstantFP(CFP, Inst, IsOutput);
1378
1379 if (isa<ConstantAggregateZero, UndefValue>(Val: Operand))
1380 return Operand;
1381
1382 Type *Ty = Operand->getType();
1383 VectorType *VecTy = dyn_cast<VectorType>(Val: Ty);
1384 if (VecTy) {
1385 if (auto *Splat = dyn_cast_or_null<ConstantFP>(Val: Operand->getSplatValue())) {
1386 ConstantFP *Folded = flushDenormalConstantFP(CFP: Splat, Inst, IsOutput);
1387 if (!Folded)
1388 return nullptr;
1389 return ConstantVector::getSplat(EC: VecTy->getElementCount(), Elt: Folded);
1390 }
1391
1392 Ty = VecTy->getElementType();
1393 }
1394
1395 if (isa<ConstantExpr>(Val: Operand))
1396 return Operand;
1397
1398 if (const auto *CV = dyn_cast<ConstantVector>(Val: Operand)) {
1399 SmallVector<Constant *, 16> NewElts;
1400 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1401 Constant *Element = CV->getAggregateElement(Elt: i);
1402 if (isa<UndefValue>(Val: Element)) {
1403 NewElts.push_back(Elt: Element);
1404 continue;
1405 }
1406
1407 ConstantFP *CFP = dyn_cast<ConstantFP>(Val: Element);
1408 if (!CFP)
1409 return nullptr;
1410
1411 ConstantFP *Folded = flushDenormalConstantFP(CFP, Inst, IsOutput);
1412 if (!Folded)
1413 return nullptr;
1414 NewElts.push_back(Elt: Folded);
1415 }
1416
1417 return ConstantVector::get(V: NewElts);
1418 }
1419
1420 if (const auto *CDV = dyn_cast<ConstantDataVector>(Val: Operand)) {
1421 SmallVector<Constant *, 16> NewElts;
1422 for (unsigned I = 0, E = CDV->getNumElements(); I < E; ++I) {
1423 const APFloat &Elt = CDV->getElementAsAPFloat(i: I);
1424 if (!Elt.isDenormal()) {
1425 NewElts.push_back(Elt: ConstantFP::get(Ty, V: Elt));
1426 } else {
1427 DenormalMode Mode = getInstrDenormalMode(CtxI: Inst, Ty);
1428 ConstantFP *Folded =
1429 flushDenormalConstant(Ty, APF: Elt, Mode: IsOutput ? Mode.Output : Mode.Input);
1430 if (!Folded)
1431 return nullptr;
1432 NewElts.push_back(Elt: Folded);
1433 }
1434 }
1435
1436 return ConstantVector::get(V: NewElts);
1437 }
1438
1439 return nullptr;
1440}
1441
1442Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS,
1443 Constant *RHS, const DataLayout &DL,
1444 const Instruction *I,
1445 bool AllowNonDeterministic) {
1446 if (Instruction::isBinaryOp(Opcode)) {
1447 // Flush denormal inputs if needed.
1448 Constant *Op0 = FlushFPConstant(Operand: LHS, Inst: I, /* IsOutput */ false);
1449 if (!Op0)
1450 return nullptr;
1451 Constant *Op1 = FlushFPConstant(Operand: RHS, Inst: I, /* IsOutput */ false);
1452 if (!Op1)
1453 return nullptr;
1454
1455 // If nsz or an algebraic FMF flag is set, the result of the FP operation
1456 // may change due to future optimization. Don't constant fold them if
1457 // non-deterministic results are not allowed.
1458 if (!AllowNonDeterministic)
1459 if (auto *FP = dyn_cast_or_null<FPMathOperator>(Val: I))
1460 if (FP->hasNoSignedZeros() || FP->hasAllowReassoc() ||
1461 FP->hasAllowContract() || FP->hasAllowReciprocal())
1462 return nullptr;
1463
1464 // Calculate constant result.
1465 Constant *C = ConstantFoldBinaryOpOperands(Opcode, LHS: Op0, RHS: Op1, DL);
1466 if (!C)
1467 return nullptr;
1468
1469 // Flush denormal output if needed.
1470 C = FlushFPConstant(Operand: C, Inst: I, /* IsOutput */ true);
1471 if (!C)
1472 return nullptr;
1473
1474 // The precise NaN value is non-deterministic.
1475 if (!AllowNonDeterministic && C->isNaN())
1476 return nullptr;
1477
1478 return C;
1479 }
1480 // If instruction lacks a parent/function and the denormal mode cannot be
1481 // determined, use the default (IEEE).
1482 return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
1483}
1484
1485Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,
1486 Type *DestTy, const DataLayout &DL) {
1487 assert(Instruction::isCast(Opcode));
1488
1489 if (auto *CE = dyn_cast<ConstantExpr>(Val: C))
1490 if (CE->isCast())
1491 if (unsigned NewOp = CastInst::isEliminableCastPair(
1492 firstOpcode: Instruction::CastOps(CE->getOpcode()),
1493 secondOpcode: Instruction::CastOps(Opcode), SrcTy: CE->getOperand(i_nocapture: 0)->getType(),
1494 MidTy: C->getType(), DstTy: DestTy, DL: &DL))
1495 return ConstantFoldCastOperand(Opcode: NewOp, C: CE->getOperand(i_nocapture: 0), DestTy, DL);
1496
1497 switch (Opcode) {
1498 default:
1499 llvm_unreachable("Missing case");
1500 case Instruction::PtrToAddr:
1501 case Instruction::PtrToInt:
1502 if (auto *CE = dyn_cast<ConstantExpr>(Val: C)) {
1503 Constant *FoldedValue = nullptr;
1504 // If the input is an inttoptr, eliminate the pair. This requires knowing
1505 // the width of a pointer, so it can't be done in ConstantExpr::getCast.
1506 if (CE->getOpcode() == Instruction::IntToPtr) {
1507 // zext/trunc the inttoptr to pointer/address size.
1508 Type *MidTy = Opcode == Instruction::PtrToInt
1509 ? DL.getAddressType(PtrTy: CE->getType())
1510 : DL.getIntPtrType(CE->getType());
1511 FoldedValue = ConstantFoldIntegerCast(C: CE->getOperand(i_nocapture: 0), DestTy: MidTy,
1512 /*IsSigned=*/false, DL);
1513 } else if (auto *GEP = dyn_cast<GEPOperator>(Val: CE)) {
1514 // If we have GEP, we can perform the following folds:
1515 // (ptrtoint/ptrtoaddr (gep null, x)) -> x
1516 // (ptrtoint/ptrtoaddr (gep (gep null, x), y) -> x + y, etc.
1517 unsigned BitWidth = DL.getIndexTypeSizeInBits(Ty: GEP->getType());
1518 APInt BaseOffset(BitWidth, 0);
1519 auto *Base = cast<Constant>(Val: GEP->stripAndAccumulateConstantOffsets(
1520 DL, Offset&: BaseOffset, /*AllowNonInbounds=*/true));
1521 if (Base->isNullValue()) {
1522 FoldedValue = ConstantInt::get(Context&: CE->getContext(), V: BaseOffset);
1523 } else {
1524 // ptrtoint/ptrtoaddr (gep i8, Ptr, (sub 0, V))
1525 // -> sub (ptrtoint/ptrtoaddr Ptr), V
1526 if (GEP->getNumIndices() == 1 &&
1527 GEP->getSourceElementType()->isIntegerTy(Bitwidth: 8)) {
1528 auto *Ptr = cast<Constant>(Val: GEP->getPointerOperand());
1529 auto *Sub = dyn_cast<ConstantExpr>(Val: GEP->getOperand(i_nocapture: 1));
1530 Type *IntIdxTy = DL.getIndexType(PtrTy: Ptr->getType());
1531 if (Sub && Sub->getType() == IntIdxTy &&
1532 Sub->getOpcode() == Instruction::Sub &&
1533 Sub->getOperand(i_nocapture: 0)->isNullValue())
1534 FoldedValue = ConstantExpr::getSub(
1535 C1: ConstantExpr::getCast(ops: Opcode, C: Ptr, Ty: IntIdxTy),
1536 C2: Sub->getOperand(i_nocapture: 1));
1537 }
1538 }
1539 }
1540 if (FoldedValue) {
1541 // Do a zext or trunc to get to the ptrtoint/ptrtoaddr dest size.
1542 return ConstantFoldIntegerCast(C: FoldedValue, DestTy, /*IsSigned=*/false,
1543 DL);
1544 }
1545 }
1546 break;
1547 case Instruction::IntToPtr:
1548 // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
1549 // the int size is >= the ptr size and the address spaces are the same.
1550 // This requires knowing the width of a pointer, so it can't be done in
1551 // ConstantExpr::getCast.
1552 if (auto *CE = dyn_cast<ConstantExpr>(Val: C)) {
1553 if (CE->getOpcode() == Instruction::PtrToInt) {
1554 Constant *SrcPtr = CE->getOperand(i_nocapture: 0);
1555 unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType());
1556 unsigned MidIntSize = CE->getType()->getScalarSizeInBits();
1557
1558 if (MidIntSize >= SrcPtrSize) {
1559 unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();
1560 if (SrcAS == DestTy->getPointerAddressSpace())
1561 return FoldBitCast(C: CE->getOperand(i_nocapture: 0), DestTy, DL);
1562 }
1563 }
1564 }
1565 break;
1566 case Instruction::Trunc:
1567 case Instruction::ZExt:
1568 case Instruction::SExt:
1569 case Instruction::FPTrunc:
1570 case Instruction::FPExt:
1571 case Instruction::UIToFP:
1572 case Instruction::SIToFP:
1573 case Instruction::FPToUI:
1574 case Instruction::FPToSI:
1575 case Instruction::AddrSpaceCast:
1576 break;
1577 case Instruction::BitCast:
1578 return FoldBitCast(C, DestTy, DL);
1579 }
1580
1581 if (ConstantExpr::isDesirableCastOp(Opcode))
1582 return ConstantExpr::getCast(ops: Opcode, C, Ty: DestTy);
1583 return ConstantFoldCastInstruction(opcode: Opcode, V: C, DestTy);
1584}
1585
1586Constant *llvm::ConstantFoldIntegerCast(Constant *C, Type *DestTy,
1587 bool IsSigned, const DataLayout &DL) {
1588 Type *SrcTy = C->getType();
1589 if (SrcTy == DestTy)
1590 return C;
1591 if (SrcTy->getScalarSizeInBits() > DestTy->getScalarSizeInBits())
1592 return ConstantFoldCastOperand(Opcode: Instruction::Trunc, C, DestTy, DL);
1593 if (IsSigned)
1594 return ConstantFoldCastOperand(Opcode: Instruction::SExt, C, DestTy, DL);
1595 return ConstantFoldCastOperand(Opcode: Instruction::ZExt, C, DestTy, DL);
1596}
1597
1598//===----------------------------------------------------------------------===//
1599// Constant Folding for Calls
1600//
1601
1602bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
1603 if (Call->isNoBuiltin())
1604 return false;
1605 if (Call->getFunctionType() != F->getFunctionType())
1606 return false;
1607
1608 // Allow FP calls (both libcalls and intrinsics) to avoid being folded.
1609 // This can be useful for GPU targets or in cross-compilation scenarios
1610 // when the exact target FP behaviour is required, and the host compiler's
1611 // behaviour may be slightly different from the device's run-time behaviour.
1612 if (DisableFPCallFolding && (F->getReturnType()->isFloatingPointTy() ||
1613 any_of(Range: F->args(), P: [](const Argument &Arg) {
1614 return Arg.getType()->isFloatingPointTy();
1615 })))
1616 return false;
1617
1618 switch (F->getIntrinsicID()) {
1619 // Operations that do not operate floating-point numbers and do not depend on
1620 // FP environment can be folded even in strictfp functions.
1621 case Intrinsic::bswap:
1622 case Intrinsic::ctpop:
1623 case Intrinsic::ctlz:
1624 case Intrinsic::cttz:
1625 case Intrinsic::fshl:
1626 case Intrinsic::fshr:
1627 case Intrinsic::launder_invariant_group:
1628 case Intrinsic::strip_invariant_group:
1629 case Intrinsic::masked_load:
1630 case Intrinsic::get_active_lane_mask:
1631 case Intrinsic::abs:
1632 case Intrinsic::smax:
1633 case Intrinsic::smin:
1634 case Intrinsic::umax:
1635 case Intrinsic::umin:
1636 case Intrinsic::scmp:
1637 case Intrinsic::ucmp:
1638 case Intrinsic::sadd_with_overflow:
1639 case Intrinsic::uadd_with_overflow:
1640 case Intrinsic::ssub_with_overflow:
1641 case Intrinsic::usub_with_overflow:
1642 case Intrinsic::smul_with_overflow:
1643 case Intrinsic::umul_with_overflow:
1644 case Intrinsic::sadd_sat:
1645 case Intrinsic::uadd_sat:
1646 case Intrinsic::ssub_sat:
1647 case Intrinsic::usub_sat:
1648 case Intrinsic::smul_fix:
1649 case Intrinsic::smul_fix_sat:
1650 case Intrinsic::bitreverse:
1651 case Intrinsic::is_constant:
1652 case Intrinsic::vector_reduce_add:
1653 case Intrinsic::vector_reduce_mul:
1654 case Intrinsic::vector_reduce_and:
1655 case Intrinsic::vector_reduce_or:
1656 case Intrinsic::vector_reduce_xor:
1657 case Intrinsic::vector_reduce_smin:
1658 case Intrinsic::vector_reduce_smax:
1659 case Intrinsic::vector_reduce_umin:
1660 case Intrinsic::vector_reduce_umax:
1661 case Intrinsic::vector_extract:
1662 case Intrinsic::vector_insert:
1663 case Intrinsic::vector_interleave2:
1664 case Intrinsic::vector_interleave3:
1665 case Intrinsic::vector_interleave4:
1666 case Intrinsic::vector_interleave5:
1667 case Intrinsic::vector_interleave6:
1668 case Intrinsic::vector_interleave7:
1669 case Intrinsic::vector_interleave8:
1670 case Intrinsic::vector_deinterleave2:
1671 case Intrinsic::vector_deinterleave3:
1672 case Intrinsic::vector_deinterleave4:
1673 case Intrinsic::vector_deinterleave5:
1674 case Intrinsic::vector_deinterleave6:
1675 case Intrinsic::vector_deinterleave7:
1676 case Intrinsic::vector_deinterleave8:
1677 // Target intrinsics
1678 case Intrinsic::amdgcn_perm:
1679 case Intrinsic::amdgcn_wave_reduce_umin:
1680 case Intrinsic::amdgcn_wave_reduce_umax:
1681 case Intrinsic::amdgcn_wave_reduce_max:
1682 case Intrinsic::amdgcn_wave_reduce_min:
1683 case Intrinsic::amdgcn_wave_reduce_add:
1684 case Intrinsic::amdgcn_wave_reduce_sub:
1685 case Intrinsic::amdgcn_wave_reduce_and:
1686 case Intrinsic::amdgcn_wave_reduce_or:
1687 case Intrinsic::amdgcn_wave_reduce_xor:
1688 case Intrinsic::amdgcn_s_wqm:
1689 case Intrinsic::amdgcn_s_quadmask:
1690 case Intrinsic::amdgcn_s_bitreplicate:
1691 case Intrinsic::arm_mve_vctp8:
1692 case Intrinsic::arm_mve_vctp16:
1693 case Intrinsic::arm_mve_vctp32:
1694 case Intrinsic::arm_mve_vctp64:
1695 case Intrinsic::aarch64_sve_convert_from_svbool:
1696 case Intrinsic::wasm_alltrue:
1697 case Intrinsic::wasm_anytrue:
1698 case Intrinsic::wasm_dot:
1699 // WebAssembly float semantics are always known
1700 case Intrinsic::wasm_trunc_signed:
1701 case Intrinsic::wasm_trunc_unsigned:
1702 return true;
1703
1704 // Floating point operations cannot be folded in strictfp functions in
1705 // general case. They can be folded if FP environment is known to compiler.
1706 case Intrinsic::minnum:
1707 case Intrinsic::maxnum:
1708 case Intrinsic::minimum:
1709 case Intrinsic::maximum:
1710 case Intrinsic::minimumnum:
1711 case Intrinsic::maximumnum:
1712 case Intrinsic::log:
1713 case Intrinsic::log2:
1714 case Intrinsic::log10:
1715 case Intrinsic::exp:
1716 case Intrinsic::exp2:
1717 case Intrinsic::exp10:
1718 case Intrinsic::sqrt:
1719 case Intrinsic::sin:
1720 case Intrinsic::cos:
1721 case Intrinsic::sincos:
1722 case Intrinsic::sinh:
1723 case Intrinsic::cosh:
1724 case Intrinsic::atan:
1725 case Intrinsic::pow:
1726 case Intrinsic::powi:
1727 case Intrinsic::ldexp:
1728 case Intrinsic::fma:
1729 case Intrinsic::fmuladd:
1730 case Intrinsic::frexp:
1731 case Intrinsic::fptoui_sat:
1732 case Intrinsic::fptosi_sat:
1733 case Intrinsic::amdgcn_cos:
1734 case Intrinsic::amdgcn_cubeid:
1735 case Intrinsic::amdgcn_cubema:
1736 case Intrinsic::amdgcn_cubesc:
1737 case Intrinsic::amdgcn_cubetc:
1738 case Intrinsic::amdgcn_fmul_legacy:
1739 case Intrinsic::amdgcn_fma_legacy:
1740 case Intrinsic::amdgcn_fract:
1741 case Intrinsic::amdgcn_sin:
1742 // The intrinsics below depend on rounding mode in MXCSR.
1743 case Intrinsic::x86_sse_cvtss2si:
1744 case Intrinsic::x86_sse_cvtss2si64:
1745 case Intrinsic::x86_sse_cvttss2si:
1746 case Intrinsic::x86_sse_cvttss2si64:
1747 case Intrinsic::x86_sse2_cvtsd2si:
1748 case Intrinsic::x86_sse2_cvtsd2si64:
1749 case Intrinsic::x86_sse2_cvttsd2si:
1750 case Intrinsic::x86_sse2_cvttsd2si64:
1751 case Intrinsic::x86_avx512_vcvtss2si32:
1752 case Intrinsic::x86_avx512_vcvtss2si64:
1753 case Intrinsic::x86_avx512_cvttss2si:
1754 case Intrinsic::x86_avx512_cvttss2si64:
1755 case Intrinsic::x86_avx512_vcvtsd2si32:
1756 case Intrinsic::x86_avx512_vcvtsd2si64:
1757 case Intrinsic::x86_avx512_cvttsd2si:
1758 case Intrinsic::x86_avx512_cvttsd2si64:
1759 case Intrinsic::x86_avx512_vcvtss2usi32:
1760 case Intrinsic::x86_avx512_vcvtss2usi64:
1761 case Intrinsic::x86_avx512_cvttss2usi:
1762 case Intrinsic::x86_avx512_cvttss2usi64:
1763 case Intrinsic::x86_avx512_vcvtsd2usi32:
1764 case Intrinsic::x86_avx512_vcvtsd2usi64:
1765 case Intrinsic::x86_avx512_cvttsd2usi:
1766 case Intrinsic::x86_avx512_cvttsd2usi64:
1767
1768 // NVVM FMax intrinsics
1769 case Intrinsic::nvvm_fmax_d:
1770 case Intrinsic::nvvm_fmax_f:
1771 case Intrinsic::nvvm_fmax_ftz_f:
1772 case Intrinsic::nvvm_fmax_ftz_nan_f:
1773 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
1774 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
1775 case Intrinsic::nvvm_fmax_nan_f:
1776 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
1777 case Intrinsic::nvvm_fmax_xorsign_abs_f:
1778
1779 // NVVM FMin intrinsics
1780 case Intrinsic::nvvm_fmin_d:
1781 case Intrinsic::nvvm_fmin_f:
1782 case Intrinsic::nvvm_fmin_ftz_f:
1783 case Intrinsic::nvvm_fmin_ftz_nan_f:
1784 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
1785 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
1786 case Intrinsic::nvvm_fmin_nan_f:
1787 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
1788 case Intrinsic::nvvm_fmin_xorsign_abs_f:
1789
1790 // NVVM float/double to int32/uint32 conversion intrinsics
1791 case Intrinsic::nvvm_f2i_rm:
1792 case Intrinsic::nvvm_f2i_rn:
1793 case Intrinsic::nvvm_f2i_rp:
1794 case Intrinsic::nvvm_f2i_rz:
1795 case Intrinsic::nvvm_f2i_rm_ftz:
1796 case Intrinsic::nvvm_f2i_rn_ftz:
1797 case Intrinsic::nvvm_f2i_rp_ftz:
1798 case Intrinsic::nvvm_f2i_rz_ftz:
1799 case Intrinsic::nvvm_f2ui_rm:
1800 case Intrinsic::nvvm_f2ui_rn:
1801 case Intrinsic::nvvm_f2ui_rp:
1802 case Intrinsic::nvvm_f2ui_rz:
1803 case Intrinsic::nvvm_f2ui_rm_ftz:
1804 case Intrinsic::nvvm_f2ui_rn_ftz:
1805 case Intrinsic::nvvm_f2ui_rp_ftz:
1806 case Intrinsic::nvvm_f2ui_rz_ftz:
1807 case Intrinsic::nvvm_d2i_rm:
1808 case Intrinsic::nvvm_d2i_rn:
1809 case Intrinsic::nvvm_d2i_rp:
1810 case Intrinsic::nvvm_d2i_rz:
1811 case Intrinsic::nvvm_d2ui_rm:
1812 case Intrinsic::nvvm_d2ui_rn:
1813 case Intrinsic::nvvm_d2ui_rp:
1814 case Intrinsic::nvvm_d2ui_rz:
1815
1816 // NVVM float/double to int64/uint64 conversion intrinsics
1817 case Intrinsic::nvvm_f2ll_rm:
1818 case Intrinsic::nvvm_f2ll_rn:
1819 case Intrinsic::nvvm_f2ll_rp:
1820 case Intrinsic::nvvm_f2ll_rz:
1821 case Intrinsic::nvvm_f2ll_rm_ftz:
1822 case Intrinsic::nvvm_f2ll_rn_ftz:
1823 case Intrinsic::nvvm_f2ll_rp_ftz:
1824 case Intrinsic::nvvm_f2ll_rz_ftz:
1825 case Intrinsic::nvvm_f2ull_rm:
1826 case Intrinsic::nvvm_f2ull_rn:
1827 case Intrinsic::nvvm_f2ull_rp:
1828 case Intrinsic::nvvm_f2ull_rz:
1829 case Intrinsic::nvvm_f2ull_rm_ftz:
1830 case Intrinsic::nvvm_f2ull_rn_ftz:
1831 case Intrinsic::nvvm_f2ull_rp_ftz:
1832 case Intrinsic::nvvm_f2ull_rz_ftz:
1833 case Intrinsic::nvvm_d2ll_rm:
1834 case Intrinsic::nvvm_d2ll_rn:
1835 case Intrinsic::nvvm_d2ll_rp:
1836 case Intrinsic::nvvm_d2ll_rz:
1837 case Intrinsic::nvvm_d2ull_rm:
1838 case Intrinsic::nvvm_d2ull_rn:
1839 case Intrinsic::nvvm_d2ull_rp:
1840 case Intrinsic::nvvm_d2ull_rz:
1841
1842 // NVVM math intrinsics:
1843 case Intrinsic::nvvm_ceil_d:
1844 case Intrinsic::nvvm_ceil_f:
1845 case Intrinsic::nvvm_ceil_ftz_f:
1846
1847 case Intrinsic::nvvm_fabs:
1848 case Intrinsic::nvvm_fabs_ftz:
1849
1850 case Intrinsic::nvvm_floor_d:
1851 case Intrinsic::nvvm_floor_f:
1852 case Intrinsic::nvvm_floor_ftz_f:
1853
1854 case Intrinsic::nvvm_rcp_rm_d:
1855 case Intrinsic::nvvm_rcp_rm_f:
1856 case Intrinsic::nvvm_rcp_rm_ftz_f:
1857 case Intrinsic::nvvm_rcp_rn_d:
1858 case Intrinsic::nvvm_rcp_rn_f:
1859 case Intrinsic::nvvm_rcp_rn_ftz_f:
1860 case Intrinsic::nvvm_rcp_rp_d:
1861 case Intrinsic::nvvm_rcp_rp_f:
1862 case Intrinsic::nvvm_rcp_rp_ftz_f:
1863 case Intrinsic::nvvm_rcp_rz_d:
1864 case Intrinsic::nvvm_rcp_rz_f:
1865 case Intrinsic::nvvm_rcp_rz_ftz_f:
1866
1867 case Intrinsic::nvvm_round_d:
1868 case Intrinsic::nvvm_round_f:
1869 case Intrinsic::nvvm_round_ftz_f:
1870
1871 case Intrinsic::nvvm_saturate_d:
1872 case Intrinsic::nvvm_saturate_f:
1873 case Intrinsic::nvvm_saturate_ftz_f:
1874
1875 case Intrinsic::nvvm_sqrt_f:
1876 case Intrinsic::nvvm_sqrt_rn_d:
1877 case Intrinsic::nvvm_sqrt_rn_f:
1878 case Intrinsic::nvvm_sqrt_rn_ftz_f:
1879 return !Call->isStrictFP();
1880
1881 // NVVM add intrinsics with explicit rounding modes
1882 case Intrinsic::nvvm_add_rm_d:
1883 case Intrinsic::nvvm_add_rn_d:
1884 case Intrinsic::nvvm_add_rp_d:
1885 case Intrinsic::nvvm_add_rz_d:
1886 case Intrinsic::nvvm_add_rm_f:
1887 case Intrinsic::nvvm_add_rn_f:
1888 case Intrinsic::nvvm_add_rp_f:
1889 case Intrinsic::nvvm_add_rz_f:
1890 case Intrinsic::nvvm_add_rm_ftz_f:
1891 case Intrinsic::nvvm_add_rn_ftz_f:
1892 case Intrinsic::nvvm_add_rp_ftz_f:
1893 case Intrinsic::nvvm_add_rz_ftz_f:
1894
1895 // NVVM div intrinsics with explicit rounding modes
1896 case Intrinsic::nvvm_div_rm_d:
1897 case Intrinsic::nvvm_div_rn_d:
1898 case Intrinsic::nvvm_div_rp_d:
1899 case Intrinsic::nvvm_div_rz_d:
1900 case Intrinsic::nvvm_div_rm_f:
1901 case Intrinsic::nvvm_div_rn_f:
1902 case Intrinsic::nvvm_div_rp_f:
1903 case Intrinsic::nvvm_div_rz_f:
1904 case Intrinsic::nvvm_div_rm_ftz_f:
1905 case Intrinsic::nvvm_div_rn_ftz_f:
1906 case Intrinsic::nvvm_div_rp_ftz_f:
1907 case Intrinsic::nvvm_div_rz_ftz_f:
1908
1909 // NVVM mul intrinsics with explicit rounding modes
1910 case Intrinsic::nvvm_mul_rm_d:
1911 case Intrinsic::nvvm_mul_rn_d:
1912 case Intrinsic::nvvm_mul_rp_d:
1913 case Intrinsic::nvvm_mul_rz_d:
1914 case Intrinsic::nvvm_mul_rm_f:
1915 case Intrinsic::nvvm_mul_rn_f:
1916 case Intrinsic::nvvm_mul_rp_f:
1917 case Intrinsic::nvvm_mul_rz_f:
1918 case Intrinsic::nvvm_mul_rm_ftz_f:
1919 case Intrinsic::nvvm_mul_rn_ftz_f:
1920 case Intrinsic::nvvm_mul_rp_ftz_f:
1921 case Intrinsic::nvvm_mul_rz_ftz_f:
1922
1923 // NVVM fma intrinsics with explicit rounding modes
1924 case Intrinsic::nvvm_fma_rm_d:
1925 case Intrinsic::nvvm_fma_rn_d:
1926 case Intrinsic::nvvm_fma_rp_d:
1927 case Intrinsic::nvvm_fma_rz_d:
1928 case Intrinsic::nvvm_fma_rm_f:
1929 case Intrinsic::nvvm_fma_rn_f:
1930 case Intrinsic::nvvm_fma_rp_f:
1931 case Intrinsic::nvvm_fma_rz_f:
1932 case Intrinsic::nvvm_fma_rm_ftz_f:
1933 case Intrinsic::nvvm_fma_rn_ftz_f:
1934 case Intrinsic::nvvm_fma_rp_ftz_f:
1935 case Intrinsic::nvvm_fma_rz_ftz_f:
1936
1937 // Sign operations are actually bitwise operations, they do not raise
1938 // exceptions even for SNANs.
1939 case Intrinsic::fabs:
1940 case Intrinsic::copysign:
1941 case Intrinsic::is_fpclass:
1942 // Non-constrained variants of rounding operations means default FP
1943 // environment, they can be folded in any case.
1944 case Intrinsic::ceil:
1945 case Intrinsic::floor:
1946 case Intrinsic::round:
1947 case Intrinsic::roundeven:
1948 case Intrinsic::trunc:
1949 case Intrinsic::nearbyint:
1950 case Intrinsic::rint:
1951 case Intrinsic::canonicalize:
1952
1953 // Constrained intrinsics can be folded if FP environment is known
1954 // to compiler.
1955 case Intrinsic::experimental_constrained_fma:
1956 case Intrinsic::experimental_constrained_fmuladd:
1957 case Intrinsic::experimental_constrained_fadd:
1958 case Intrinsic::experimental_constrained_fsub:
1959 case Intrinsic::experimental_constrained_fmul:
1960 case Intrinsic::experimental_constrained_fdiv:
1961 case Intrinsic::experimental_constrained_frem:
1962 case Intrinsic::experimental_constrained_ceil:
1963 case Intrinsic::experimental_constrained_floor:
1964 case Intrinsic::experimental_constrained_round:
1965 case Intrinsic::experimental_constrained_roundeven:
1966 case Intrinsic::experimental_constrained_trunc:
1967 case Intrinsic::experimental_constrained_nearbyint:
1968 case Intrinsic::experimental_constrained_rint:
1969 case Intrinsic::experimental_constrained_fcmp:
1970 case Intrinsic::experimental_constrained_fcmps:
1971 return true;
1972 default:
1973 return false;
1974 case Intrinsic::not_intrinsic: break;
1975 }
1976
1977 if (!F->hasName() || Call->isStrictFP())
1978 return false;
1979
1980 // In these cases, the check of the length is required. We don't want to
1981 // return true for a name like "cos\0blah" which strcmp would return equal to
1982 // "cos", but has length 8.
1983 StringRef Name = F->getName();
1984 switch (Name[0]) {
1985 default:
1986 return false;
1987 // clang-format off
1988 case 'a':
1989 return Name == "acos" || Name == "acosf" ||
1990 Name == "asin" || Name == "asinf" ||
1991 Name == "atan" || Name == "atanf" ||
1992 Name == "atan2" || Name == "atan2f";
1993 case 'c':
1994 return Name == "ceil" || Name == "ceilf" ||
1995 Name == "cos" || Name == "cosf" ||
1996 Name == "cosh" || Name == "coshf";
1997 case 'e':
1998 return Name == "exp" || Name == "expf" || Name == "exp2" ||
1999 Name == "exp2f" || Name == "erf" || Name == "erff";
2000 case 'f':
2001 return Name == "fabs" || Name == "fabsf" ||
2002 Name == "floor" || Name == "floorf" ||
2003 Name == "fmod" || Name == "fmodf";
2004 case 'i':
2005 return Name == "ilogb" || Name == "ilogbf";
2006 case 'l':
2007 return Name == "log" || Name == "logf" || Name == "logl" ||
2008 Name == "log2" || Name == "log2f" || Name == "log10" ||
2009 Name == "log10f" || Name == "logb" || Name == "logbf" ||
2010 Name == "log1p" || Name == "log1pf";
2011 case 'n':
2012 return Name == "nearbyint" || Name == "nearbyintf";
2013 case 'p':
2014 return Name == "pow" || Name == "powf";
2015 case 'r':
2016 return Name == "remainder" || Name == "remainderf" ||
2017 Name == "rint" || Name == "rintf" ||
2018 Name == "round" || Name == "roundf" ||
2019 Name == "roundeven" || Name == "roundevenf";
2020 case 's':
2021 return Name == "sin" || Name == "sinf" ||
2022 Name == "sinh" || Name == "sinhf" ||
2023 Name == "sqrt" || Name == "sqrtf";
2024 case 't':
2025 return Name == "tan" || Name == "tanf" ||
2026 Name == "tanh" || Name == "tanhf" ||
2027 Name == "trunc" || Name == "truncf";
2028 case '_':
2029 // Check for various function names that get used for the math functions
2030 // when the header files are preprocessed with the macro
2031 // __FINITE_MATH_ONLY__ enabled.
2032 // The '12' here is the length of the shortest name that can match.
2033 // We need to check the size before looking at Name[1] and Name[2]
2034 // so we may as well check a limit that will eliminate mismatches.
2035 if (Name.size() < 12 || Name[1] != '_')
2036 return false;
2037 switch (Name[2]) {
2038 default:
2039 return false;
2040 case 'a':
2041 return Name == "__acos_finite" || Name == "__acosf_finite" ||
2042 Name == "__asin_finite" || Name == "__asinf_finite" ||
2043 Name == "__atan2_finite" || Name == "__atan2f_finite";
2044 case 'c':
2045 return Name == "__cosh_finite" || Name == "__coshf_finite";
2046 case 'e':
2047 return Name == "__exp_finite" || Name == "__expf_finite" ||
2048 Name == "__exp2_finite" || Name == "__exp2f_finite";
2049 case 'l':
2050 return Name == "__log_finite" || Name == "__logf_finite" ||
2051 Name == "__log10_finite" || Name == "__log10f_finite";
2052 case 'p':
2053 return Name == "__pow_finite" || Name == "__powf_finite";
2054 case 's':
2055 return Name == "__sinh_finite" || Name == "__sinhf_finite";
2056 }
2057 // clang-format on
2058 }
2059}
2060
2061namespace {
2062
2063Constant *GetConstantFoldFPValue(double V, Type *Ty) {
2064 if (Ty->isHalfTy() || Ty->isFloatTy()) {
2065 APFloat APF(V);
2066 bool unused;
2067 APF.convert(ToSemantics: Ty->getFltSemantics(), RM: APFloat::rmNearestTiesToEven, losesInfo: &unused);
2068 return ConstantFP::get(Context&: Ty->getContext(), V: APF);
2069 }
2070 if (Ty->isDoubleTy())
2071 return ConstantFP::get(Context&: Ty->getContext(), V: APFloat(V));
2072 llvm_unreachable("Can only constant fold half/float/double");
2073}
2074
2075#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
2076Constant *GetConstantFoldFPValue128(float128 V, Type *Ty) {
2077 if (Ty->isFP128Ty())
2078 return ConstantFP::get(Ty, V);
2079 llvm_unreachable("Can only constant fold fp128");
2080}
2081#endif
2082
2083/// Clear the floating-point exception state.
2084inline void llvm_fenv_clearexcept() {
2085#if HAVE_DECL_FE_ALL_EXCEPT
2086 feclearexcept(FE_ALL_EXCEPT);
2087#endif
2088 errno = 0;
2089}
2090
2091/// Test if a floating-point exception was raised.
2092inline bool llvm_fenv_testexcept() {
2093 int errno_val = errno;
2094 if (errno_val == ERANGE || errno_val == EDOM)
2095 return true;
2096#if HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT
2097 if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT))
2098 return true;
2099#endif
2100 return false;
2101}
2102
2103static APFloat FTZPreserveSign(const APFloat &V) {
2104 if (V.isDenormal())
2105 return APFloat::getZero(Sem: V.getSemantics(), Negative: V.isNegative());
2106 return V;
2107}
2108
2109static APFloat FlushToPositiveZero(const APFloat &V) {
2110 if (V.isDenormal())
2111 return APFloat::getZero(Sem: V.getSemantics(), Negative: false);
2112 return V;
2113}
2114
2115static APFloat FlushWithDenormKind(const APFloat &V,
2116 DenormalMode::DenormalModeKind DenormKind) {
2117 assert(DenormKind != DenormalMode::DenormalModeKind::Invalid &&
2118 DenormKind != DenormalMode::DenormalModeKind::Dynamic);
2119 switch (DenormKind) {
2120 case DenormalMode::DenormalModeKind::IEEE:
2121 return V;
2122 case DenormalMode::DenormalModeKind::PreserveSign:
2123 return FTZPreserveSign(V);
2124 case DenormalMode::DenormalModeKind::PositiveZero:
2125 return FlushToPositiveZero(V);
2126 default:
2127 llvm_unreachable("Invalid denormal mode!");
2128 }
2129}
2130
2131Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, Type *Ty,
2132 DenormalMode DenormMode = DenormalMode::getIEEE()) {
2133 if (!DenormMode.isValid() ||
2134 DenormMode.Input == DenormalMode::DenormalModeKind::Dynamic ||
2135 DenormMode.Output == DenormalMode::DenormalModeKind::Dynamic)
2136 return nullptr;
2137
2138 llvm_fenv_clearexcept();
2139 auto Input = FlushWithDenormKind(V, DenormKind: DenormMode.Input);
2140 double Result = NativeFP(Input.convertToDouble());
2141 if (llvm_fenv_testexcept()) {
2142 llvm_fenv_clearexcept();
2143 return nullptr;
2144 }
2145
2146 Constant *Output = GetConstantFoldFPValue(V: Result, Ty);
2147 if (DenormMode.Output == DenormalMode::DenormalModeKind::IEEE)
2148 return Output;
2149 const auto *CFP = static_cast<ConstantFP *>(Output);
2150 const auto Res = FlushWithDenormKind(V: CFP->getValueAPF(), DenormKind: DenormMode.Output);
2151 return ConstantFP::get(Context&: Ty->getContext(), V: Res);
2152}
2153
2154#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
2155Constant *ConstantFoldFP128(float128 (*NativeFP)(float128), const APFloat &V,
2156 Type *Ty) {
2157 llvm_fenv_clearexcept();
2158 float128 Result = NativeFP(V.convertToQuad());
2159 if (llvm_fenv_testexcept()) {
2160 llvm_fenv_clearexcept();
2161 return nullptr;
2162 }
2163
2164 return GetConstantFoldFPValue128(V: Result, Ty);
2165}
2166#endif
2167
2168Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
2169 const APFloat &V, const APFloat &W, Type *Ty) {
2170 llvm_fenv_clearexcept();
2171 double Result = NativeFP(V.convertToDouble(), W.convertToDouble());
2172 if (llvm_fenv_testexcept()) {
2173 llvm_fenv_clearexcept();
2174 return nullptr;
2175 }
2176
2177 return GetConstantFoldFPValue(V: Result, Ty);
2178}
2179
2180Constant *constantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) {
2181 auto *OpVT = cast<VectorType>(Val: Op->getType());
2182
2183 // This is the same as the underlying binops - poison propagates.
2184 if (Op->containsPoisonElement())
2185 return PoisonValue::get(T: OpVT->getElementType());
2186
2187 // Shortcut non-accumulating reductions.
2188 if (Constant *SplatVal = Op->getSplatValue()) {
2189 switch (IID) {
2190 case Intrinsic::vector_reduce_and:
2191 case Intrinsic::vector_reduce_or:
2192 case Intrinsic::vector_reduce_smin:
2193 case Intrinsic::vector_reduce_smax:
2194 case Intrinsic::vector_reduce_umin:
2195 case Intrinsic::vector_reduce_umax:
2196 return SplatVal;
2197 case Intrinsic::vector_reduce_add:
2198 if (SplatVal->isNullValue())
2199 return SplatVal;
2200 break;
2201 case Intrinsic::vector_reduce_mul:
2202 if (SplatVal->isNullValue() || SplatVal->isOneValue())
2203 return SplatVal;
2204 break;
2205 case Intrinsic::vector_reduce_xor:
2206 if (SplatVal->isNullValue())
2207 return SplatVal;
2208 if (OpVT->getElementCount().isKnownMultipleOf(RHS: 2))
2209 return Constant::getNullValue(Ty: OpVT->getElementType());
2210 break;
2211 }
2212 }
2213
2214 FixedVectorType *VT = dyn_cast<FixedVectorType>(Val: OpVT);
2215 if (!VT)
2216 return nullptr;
2217
2218 // TODO: Handle undef.
2219 auto *EltC = dyn_cast_or_null<ConstantInt>(Val: Op->getAggregateElement(Elt: 0U));
2220 if (!EltC)
2221 return nullptr;
2222
2223 APInt Acc = EltC->getValue();
2224 for (unsigned I = 1, E = VT->getNumElements(); I != E; I++) {
2225 if (!(EltC = dyn_cast_or_null<ConstantInt>(Val: Op->getAggregateElement(Elt: I))))
2226 return nullptr;
2227 const APInt &X = EltC->getValue();
2228 switch (IID) {
2229 case Intrinsic::vector_reduce_add:
2230 Acc = Acc + X;
2231 break;
2232 case Intrinsic::vector_reduce_mul:
2233 Acc = Acc * X;
2234 break;
2235 case Intrinsic::vector_reduce_and:
2236 Acc = Acc & X;
2237 break;
2238 case Intrinsic::vector_reduce_or:
2239 Acc = Acc | X;
2240 break;
2241 case Intrinsic::vector_reduce_xor:
2242 Acc = Acc ^ X;
2243 break;
2244 case Intrinsic::vector_reduce_smin:
2245 Acc = APIntOps::smin(A: Acc, B: X);
2246 break;
2247 case Intrinsic::vector_reduce_smax:
2248 Acc = APIntOps::smax(A: Acc, B: X);
2249 break;
2250 case Intrinsic::vector_reduce_umin:
2251 Acc = APIntOps::umin(A: Acc, B: X);
2252 break;
2253 case Intrinsic::vector_reduce_umax:
2254 Acc = APIntOps::umax(A: Acc, B: X);
2255 break;
2256 }
2257 }
2258
2259 return ConstantInt::get(Context&: Op->getContext(), V: Acc);
2260}
2261
2262/// Attempt to fold an SSE floating point to integer conversion of a constant
2263/// floating point. If roundTowardZero is false, the default IEEE rounding is
2264/// used (toward nearest, ties to even). This matches the behavior of the
2265/// non-truncating SSE instructions in the default rounding mode. The desired
2266/// integer type Ty is used to select how many bits are available for the
2267/// result. Returns null if the conversion cannot be performed, otherwise
2268/// returns the Constant value resulting from the conversion.
2269Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,
2270 Type *Ty, bool IsSigned) {
2271 // All of these conversion intrinsics form an integer of at most 64bits.
2272 unsigned ResultWidth = Ty->getIntegerBitWidth();
2273 assert(ResultWidth <= 64 &&
2274 "Can only constant fold conversions to 64 and 32 bit ints");
2275
2276 uint64_t UIntVal;
2277 bool isExact = false;
2278 APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero
2279 : APFloat::rmNearestTiesToEven;
2280 APFloat::opStatus status =
2281 Val.convertToInteger(Input: MutableArrayRef(UIntVal), Width: ResultWidth,
2282 IsSigned, RM: mode, IsExact: &isExact);
2283 if (status != APFloat::opOK &&
2284 (!roundTowardZero || status != APFloat::opInexact))
2285 return nullptr;
2286 return ConstantInt::get(Ty, V: UIntVal, IsSigned);
2287}
2288
2289double getValueAsDouble(ConstantFP *Op) {
2290 Type *Ty = Op->getType();
2291
2292 if (Ty->isBFloatTy() || Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
2293 return Op->getValueAPF().convertToDouble();
2294
2295 bool unused;
2296 APFloat APF = Op->getValueAPF();
2297 APF.convert(ToSemantics: APFloat::IEEEdouble(), RM: APFloat::rmNearestTiesToEven, losesInfo: &unused);
2298 return APF.convertToDouble();
2299}
2300
2301static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
2302 if (auto *CI = dyn_cast<ConstantInt>(Val: Op)) {
2303 C = &CI->getValue();
2304 return true;
2305 }
2306 if (isa<UndefValue>(Val: Op)) {
2307 C = nullptr;
2308 return true;
2309 }
2310 return false;
2311}
2312
2313/// Checks if the given intrinsic call, which evaluates to constant, is allowed
2314/// to be folded.
2315///
2316/// \param CI Constrained intrinsic call.
2317/// \param St Exception flags raised during constant evaluation.
2318static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI,
2319 APFloat::opStatus St) {
2320 std::optional<RoundingMode> ORM = CI->getRoundingMode();
2321 std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
2322
2323 // If the operation does not change exception status flags, it is safe
2324 // to fold.
2325 if (St == APFloat::opStatus::opOK)
2326 return true;
2327
2328 // If evaluation raised FP exception, the result can depend on rounding
2329 // mode. If the latter is unknown, folding is not possible.
2330 if (ORM == RoundingMode::Dynamic)
2331 return false;
2332
2333 // If FP exceptions are ignored, fold the call, even if such exception is
2334 // raised.
2335 if (EB && *EB != fp::ExceptionBehavior::ebStrict)
2336 return true;
2337
2338 // Leave the calculation for runtime so that exception flags be correctly set
2339 // in hardware.
2340 return false;
2341}
2342
2343/// Returns the rounding mode that should be used for constant evaluation.
2344static RoundingMode
2345getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) {
2346 std::optional<RoundingMode> ORM = CI->getRoundingMode();
2347 if (!ORM || *ORM == RoundingMode::Dynamic)
2348 // Even if the rounding mode is unknown, try evaluating the operation.
2349 // If it does not raise inexact exception, rounding was not applied,
2350 // so the result is exact and does not depend on rounding mode. Whether
2351 // other FP exceptions are raised, it does not depend on rounding mode.
2352 return RoundingMode::NearestTiesToEven;
2353 return *ORM;
2354}
2355
2356/// Try to constant fold llvm.canonicalize for the given caller and value.
2357static Constant *constantFoldCanonicalize(const Type *Ty, const CallBase *CI,
2358 const APFloat &Src) {
2359 // Zero, positive and negative, is always OK to fold.
2360 if (Src.isZero()) {
2361 // Get a fresh 0, since ppc_fp128 does have non-canonical zeros.
2362 return ConstantFP::get(
2363 Context&: CI->getContext(),
2364 V: APFloat::getZero(Sem: Src.getSemantics(), Negative: Src.isNegative()));
2365 }
2366
2367 if (!Ty->isIEEELikeFPTy())
2368 return nullptr;
2369
2370 // Zero is always canonical and the sign must be preserved.
2371 //
2372 // Denorms and nans may have special encodings, but it should be OK to fold a
2373 // totally average number.
2374 if (Src.isNormal() || Src.isInfinity())
2375 return ConstantFP::get(Context&: CI->getContext(), V: Src);
2376
2377 if (Src.isDenormal() && CI->getParent() && CI->getFunction()) {
2378 DenormalMode DenormMode =
2379 CI->getFunction()->getDenormalMode(FPType: Src.getSemantics());
2380
2381 if (DenormMode == DenormalMode::getIEEE())
2382 return ConstantFP::get(Context&: CI->getContext(), V: Src);
2383
2384 if (DenormMode.Input == DenormalMode::Dynamic)
2385 return nullptr;
2386
2387 // If we know if either input or output is flushed, we can fold.
2388 if ((DenormMode.Input == DenormalMode::Dynamic &&
2389 DenormMode.Output == DenormalMode::IEEE) ||
2390 (DenormMode.Input == DenormalMode::IEEE &&
2391 DenormMode.Output == DenormalMode::Dynamic))
2392 return nullptr;
2393
2394 bool IsPositive =
2395 (!Src.isNegative() || DenormMode.Input == DenormalMode::PositiveZero ||
2396 (DenormMode.Output == DenormalMode::PositiveZero &&
2397 DenormMode.Input == DenormalMode::IEEE));
2398
2399 return ConstantFP::get(Context&: CI->getContext(),
2400 V: APFloat::getZero(Sem: Src.getSemantics(), Negative: !IsPositive));
2401 }
2402
2403 return nullptr;
2404}
2405
2406static Constant *ConstantFoldScalarCall1(StringRef Name,
2407 Intrinsic::ID IntrinsicID,
2408 Type *Ty,
2409 ArrayRef<Constant *> Operands,
2410 const TargetLibraryInfo *TLI,
2411 const CallBase *Call) {
2412 assert(Operands.size() == 1 && "Wrong number of operands.");
2413
2414 if (IntrinsicID == Intrinsic::is_constant) {
2415 // We know we have a "Constant" argument. But we want to only
2416 // return true for manifest constants, not those that depend on
2417 // constants with unknowable values, e.g. GlobalValue or BlockAddress.
2418 if (Operands[0]->isManifestConstant())
2419 return ConstantInt::getTrue(Context&: Ty->getContext());
2420 return nullptr;
2421 }
2422
2423 if (isa<UndefValue>(Val: Operands[0])) {
2424 // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
2425 // ctpop() is between 0 and bitwidth, pick 0 for undef.
2426 // fptoui.sat and fptosi.sat can always fold to zero (for a zero input).
2427 if (IntrinsicID == Intrinsic::cos ||
2428 IntrinsicID == Intrinsic::ctpop ||
2429 IntrinsicID == Intrinsic::fptoui_sat ||
2430 IntrinsicID == Intrinsic::fptosi_sat ||
2431 IntrinsicID == Intrinsic::canonicalize)
2432 return Constant::getNullValue(Ty);
2433 if (IntrinsicID == Intrinsic::bswap ||
2434 IntrinsicID == Intrinsic::bitreverse ||
2435 IntrinsicID == Intrinsic::launder_invariant_group ||
2436 IntrinsicID == Intrinsic::strip_invariant_group)
2437 return Operands[0];
2438 }
2439
2440 if (isa<ConstantPointerNull>(Val: Operands[0])) {
2441 // launder(null) == null == strip(null) iff in addrspace 0
2442 if (IntrinsicID == Intrinsic::launder_invariant_group ||
2443 IntrinsicID == Intrinsic::strip_invariant_group) {
2444 // If instruction is not yet put in a basic block (e.g. when cloning
2445 // a function during inlining), Call's caller may not be available.
2446 // So check Call's BB first before querying Call->getCaller.
2447 const Function *Caller =
2448 Call->getParent() ? Call->getCaller() : nullptr;
2449 if (Caller &&
2450 !NullPointerIsDefined(
2451 F: Caller, AS: Operands[0]->getType()->getPointerAddressSpace())) {
2452 return Operands[0];
2453 }
2454 return nullptr;
2455 }
2456 }
2457
2458 if (auto *Op = dyn_cast<ConstantFP>(Val: Operands[0])) {
2459 APFloat U = Op->getValueAPF();
2460
2461 if (IntrinsicID == Intrinsic::wasm_trunc_signed ||
2462 IntrinsicID == Intrinsic::wasm_trunc_unsigned) {
2463 bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed;
2464
2465 if (U.isNaN())
2466 return nullptr;
2467
2468 unsigned Width = Ty->getIntegerBitWidth();
2469 APSInt Int(Width, !Signed);
2470 bool IsExact = false;
2471 APFloat::opStatus Status =
2472 U.convertToInteger(Result&: Int, RM: APFloat::rmTowardZero, IsExact: &IsExact);
2473
2474 if (Status == APFloat::opOK || Status == APFloat::opInexact)
2475 return ConstantInt::get(Ty, V: Int);
2476
2477 return nullptr;
2478 }
2479
2480 if (IntrinsicID == Intrinsic::fptoui_sat ||
2481 IntrinsicID == Intrinsic::fptosi_sat) {
2482 // convertToInteger() already has the desired saturation semantics.
2483 APSInt Int(Ty->getIntegerBitWidth(),
2484 IntrinsicID == Intrinsic::fptoui_sat);
2485 bool IsExact;
2486 U.convertToInteger(Result&: Int, RM: APFloat::rmTowardZero, IsExact: &IsExact);
2487 return ConstantInt::get(Ty, V: Int);
2488 }
2489
2490 if (IntrinsicID == Intrinsic::canonicalize)
2491 return constantFoldCanonicalize(Ty, CI: Call, Src: U);
2492
2493#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)
2494 if (Ty->isFP128Ty()) {
2495 if (IntrinsicID == Intrinsic::log) {
2496 float128 Result = logf128(Op->getValueAPF().convertToQuad());
2497 return GetConstantFoldFPValue128(V: Result, Ty);
2498 }
2499
2500 LibFunc Fp128Func = NotLibFunc;
2501 if (TLI && TLI->getLibFunc(funcName: Name, F&: Fp128Func) && TLI->has(F: Fp128Func) &&
2502 Fp128Func == LibFunc_logl)
2503 return ConstantFoldFP128(logf128, Op->getValueAPF(), Ty);
2504 }
2505#endif
2506
2507 if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy() &&
2508 !Ty->isIntegerTy())
2509 return nullptr;
2510
2511 // Use internal versions of these intrinsics.
2512
2513 if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint ||
2514 IntrinsicID == Intrinsic::roundeven) {
2515 U.roundToIntegral(RM: APFloat::rmNearestTiesToEven);
2516 return ConstantFP::get(Ty, V: U);
2517 }
2518
2519 if (IntrinsicID == Intrinsic::round) {
2520 U.roundToIntegral(RM: APFloat::rmNearestTiesToAway);
2521 return ConstantFP::get(Ty, V: U);
2522 }
2523
2524 if (IntrinsicID == Intrinsic::roundeven) {
2525 U.roundToIntegral(RM: APFloat::rmNearestTiesToEven);
2526 return ConstantFP::get(Ty, V: U);
2527 }
2528
2529 if (IntrinsicID == Intrinsic::ceil) {
2530 U.roundToIntegral(RM: APFloat::rmTowardPositive);
2531 return ConstantFP::get(Ty, V: U);
2532 }
2533
2534 if (IntrinsicID == Intrinsic::floor) {
2535 U.roundToIntegral(RM: APFloat::rmTowardNegative);
2536 return ConstantFP::get(Ty, V: U);
2537 }
2538
2539 if (IntrinsicID == Intrinsic::trunc) {
2540 U.roundToIntegral(RM: APFloat::rmTowardZero);
2541 return ConstantFP::get(Ty, V: U);
2542 }
2543
2544 if (IntrinsicID == Intrinsic::fabs) {
2545 U.clearSign();
2546 return ConstantFP::get(Ty, V: U);
2547 }
2548
2549 if (IntrinsicID == Intrinsic::amdgcn_fract) {
2550 // The v_fract instruction behaves like the OpenCL spec, which defines
2551 // fract(x) as fmin(x - floor(x), 0x1.fffffep-1f): "The min() operator is
2552 // there to prevent fract(-small) from returning 1.0. It returns the
2553 // largest positive floating-point number less than 1.0."
2554 APFloat FloorU(U);
2555 FloorU.roundToIntegral(RM: APFloat::rmTowardNegative);
2556 APFloat FractU(U - FloorU);
2557 APFloat AlmostOne(U.getSemantics(), 1);
2558 AlmostOne.next(/*nextDown*/ true);
2559 return ConstantFP::get(Ty, V: minimum(A: FractU, B: AlmostOne));
2560 }
2561
2562 // Rounding operations (floor, trunc, ceil, round and nearbyint) do not
2563 // raise FP exceptions, unless the argument is signaling NaN.
2564
2565 std::optional<APFloat::roundingMode> RM;
2566 switch (IntrinsicID) {
2567 default:
2568 break;
2569 case Intrinsic::experimental_constrained_nearbyint:
2570 case Intrinsic::experimental_constrained_rint: {
2571 auto CI = cast<ConstrainedFPIntrinsic>(Val: Call);
2572 RM = CI->getRoundingMode();
2573 if (!RM || *RM == RoundingMode::Dynamic)
2574 return nullptr;
2575 break;
2576 }
2577 case Intrinsic::experimental_constrained_round:
2578 RM = APFloat::rmNearestTiesToAway;
2579 break;
2580 case Intrinsic::experimental_constrained_ceil:
2581 RM = APFloat::rmTowardPositive;
2582 break;
2583 case Intrinsic::experimental_constrained_floor:
2584 RM = APFloat::rmTowardNegative;
2585 break;
2586 case Intrinsic::experimental_constrained_trunc:
2587 RM = APFloat::rmTowardZero;
2588 break;
2589 }
2590 if (RM) {
2591 auto CI = cast<ConstrainedFPIntrinsic>(Val: Call);
2592 if (U.isFinite()) {
2593 APFloat::opStatus St = U.roundToIntegral(RM: *RM);
2594 if (IntrinsicID == Intrinsic::experimental_constrained_rint &&
2595 St == APFloat::opInexact) {
2596 std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
2597 if (EB == fp::ebStrict)
2598 return nullptr;
2599 }
2600 } else if (U.isSignaling()) {
2601 std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
2602 if (EB && *EB != fp::ebIgnore)
2603 return nullptr;
2604 U = APFloat::getQNaN(Sem: U.getSemantics());
2605 }
2606 return ConstantFP::get(Ty, V: U);
2607 }
2608
2609 // NVVM float/double to signed/unsigned int32/int64 conversions:
2610 switch (IntrinsicID) {
2611 // f2i
2612 case Intrinsic::nvvm_f2i_rm:
2613 case Intrinsic::nvvm_f2i_rn:
2614 case Intrinsic::nvvm_f2i_rp:
2615 case Intrinsic::nvvm_f2i_rz:
2616 case Intrinsic::nvvm_f2i_rm_ftz:
2617 case Intrinsic::nvvm_f2i_rn_ftz:
2618 case Intrinsic::nvvm_f2i_rp_ftz:
2619 case Intrinsic::nvvm_f2i_rz_ftz:
2620 // f2ui
2621 case Intrinsic::nvvm_f2ui_rm:
2622 case Intrinsic::nvvm_f2ui_rn:
2623 case Intrinsic::nvvm_f2ui_rp:
2624 case Intrinsic::nvvm_f2ui_rz:
2625 case Intrinsic::nvvm_f2ui_rm_ftz:
2626 case Intrinsic::nvvm_f2ui_rn_ftz:
2627 case Intrinsic::nvvm_f2ui_rp_ftz:
2628 case Intrinsic::nvvm_f2ui_rz_ftz:
2629 // d2i
2630 case Intrinsic::nvvm_d2i_rm:
2631 case Intrinsic::nvvm_d2i_rn:
2632 case Intrinsic::nvvm_d2i_rp:
2633 case Intrinsic::nvvm_d2i_rz:
2634 // d2ui
2635 case Intrinsic::nvvm_d2ui_rm:
2636 case Intrinsic::nvvm_d2ui_rn:
2637 case Intrinsic::nvvm_d2ui_rp:
2638 case Intrinsic::nvvm_d2ui_rz:
2639 // f2ll
2640 case Intrinsic::nvvm_f2ll_rm:
2641 case Intrinsic::nvvm_f2ll_rn:
2642 case Intrinsic::nvvm_f2ll_rp:
2643 case Intrinsic::nvvm_f2ll_rz:
2644 case Intrinsic::nvvm_f2ll_rm_ftz:
2645 case Intrinsic::nvvm_f2ll_rn_ftz:
2646 case Intrinsic::nvvm_f2ll_rp_ftz:
2647 case Intrinsic::nvvm_f2ll_rz_ftz:
2648 // f2ull
2649 case Intrinsic::nvvm_f2ull_rm:
2650 case Intrinsic::nvvm_f2ull_rn:
2651 case Intrinsic::nvvm_f2ull_rp:
2652 case Intrinsic::nvvm_f2ull_rz:
2653 case Intrinsic::nvvm_f2ull_rm_ftz:
2654 case Intrinsic::nvvm_f2ull_rn_ftz:
2655 case Intrinsic::nvvm_f2ull_rp_ftz:
2656 case Intrinsic::nvvm_f2ull_rz_ftz:
2657 // d2ll
2658 case Intrinsic::nvvm_d2ll_rm:
2659 case Intrinsic::nvvm_d2ll_rn:
2660 case Intrinsic::nvvm_d2ll_rp:
2661 case Intrinsic::nvvm_d2ll_rz:
2662 // d2ull
2663 case Intrinsic::nvvm_d2ull_rm:
2664 case Intrinsic::nvvm_d2ull_rn:
2665 case Intrinsic::nvvm_d2ull_rp:
2666 case Intrinsic::nvvm_d2ull_rz: {
2667 // In float-to-integer conversion, NaN inputs are converted to 0.
2668 if (U.isNaN()) {
2669 // In float-to-integer conversion, NaN inputs are converted to 0
2670 // when the source and destination bitwidths are both less than 64.
2671 if (nvvm::FPToIntegerIntrinsicNaNZero(IntrinsicID))
2672 return ConstantInt::get(Ty, V: 0);
2673
2674 // Otherwise, the most significant bit is set.
2675 unsigned BitWidth = Ty->getIntegerBitWidth();
2676 uint64_t Val = 1ULL << (BitWidth - 1);
2677 return ConstantInt::get(Ty, V: APInt(BitWidth, Val, /*IsSigned=*/false));
2678 }
2679
2680 APFloat::roundingMode RMode =
2681 nvvm::GetFPToIntegerRoundingMode(IntrinsicID);
2682 bool IsFTZ = nvvm::FPToIntegerIntrinsicShouldFTZ(IntrinsicID);
2683 bool IsSigned = nvvm::FPToIntegerIntrinsicResultIsSigned(IntrinsicID);
2684
2685 APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
2686 auto FloatToRound = IsFTZ ? FTZPreserveSign(V: U) : U;
2687
2688 // Return max/min value for integers if the result is +/-inf or
2689 // is too large to fit in the result's integer bitwidth.
2690 bool IsExact = false;
2691 FloatToRound.convertToInteger(Result&: ResInt, RM: RMode, IsExact: &IsExact);
2692 return ConstantInt::get(Ty, V: ResInt);
2693 }
2694 }
2695
2696 /// We only fold functions with finite arguments. Folding NaN and inf is
2697 /// likely to be aborted with an exception anyway, and some host libms
2698 /// have known errors raising exceptions.
2699 if (!U.isFinite())
2700 return nullptr;
2701
2702 /// Currently APFloat versions of these functions do not exist, so we use
2703 /// the host native double versions. Float versions are not called
2704 /// directly but for all these it is true (float)(f((double)arg)) ==
2705 /// f(arg). Long double not supported yet.
2706 const APFloat &APF = Op->getValueAPF();
2707
2708 switch (IntrinsicID) {
2709 default: break;
2710 case Intrinsic::log:
2711 if (U.isZero())
2712 return ConstantFP::getInfinity(Ty, Negative: true);
2713 if (U.isNegative())
2714 return ConstantFP::getNaN(Ty);
2715 if (U.isExactlyValue(V: 1.0))
2716 return ConstantFP::getZero(Ty);
2717 return ConstantFoldFP(NativeFP: log, V: APF, Ty);
2718 case Intrinsic::log2:
2719 if (U.isZero())
2720 return ConstantFP::getInfinity(Ty, Negative: true);
2721 if (U.isNegative())
2722 return ConstantFP::getNaN(Ty);
2723 if (U.isExactlyValue(V: 1.0))
2724 return ConstantFP::getZero(Ty);
2725 // TODO: What about hosts that lack a C99 library?
2726 return ConstantFoldFP(NativeFP: log2, V: APF, Ty);
2727 case Intrinsic::log10:
2728 if (U.isZero())
2729 return ConstantFP::getInfinity(Ty, Negative: true);
2730 if (U.isNegative())
2731 return ConstantFP::getNaN(Ty);
2732 if (U.isExactlyValue(V: 1.0))
2733 return ConstantFP::getZero(Ty);
2734 // TODO: What about hosts that lack a C99 library?
2735 return ConstantFoldFP(NativeFP: log10, V: APF, Ty);
2736 case Intrinsic::exp:
2737 return ConstantFoldFP(NativeFP: exp, V: APF, Ty);
2738 case Intrinsic::exp2:
2739 // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
2740 return ConstantFoldBinaryFP(NativeFP: pow, V: APFloat(2.0), W: APF, Ty);
2741 case Intrinsic::exp10:
2742 // Fold exp10(x) as pow(10, x), in case the host lacks a C99 library.
2743 return ConstantFoldBinaryFP(NativeFP: pow, V: APFloat(10.0), W: APF, Ty);
2744 case Intrinsic::sin:
2745 return ConstantFoldFP(NativeFP: sin, V: APF, Ty);
2746 case Intrinsic::cos:
2747 return ConstantFoldFP(NativeFP: cos, V: APF, Ty);
2748 case Intrinsic::sinh:
2749 return ConstantFoldFP(NativeFP: sinh, V: APF, Ty);
2750 case Intrinsic::cosh:
2751 return ConstantFoldFP(NativeFP: cosh, V: APF, Ty);
2752 case Intrinsic::atan:
2753 // Implement optional behavior from C's Annex F for +/-0.0.
2754 if (U.isZero())
2755 return ConstantFP::get(Ty, V: U);
2756 return ConstantFoldFP(NativeFP: atan, V: APF, Ty);
2757 case Intrinsic::sqrt:
2758 return ConstantFoldFP(NativeFP: sqrt, V: APF, Ty);
2759
2760 // NVVM Intrinsics:
2761 case Intrinsic::nvvm_ceil_ftz_f:
2762 case Intrinsic::nvvm_ceil_f:
2763 case Intrinsic::nvvm_ceil_d:
2764 return ConstantFoldFP(
2765 NativeFP: ceil, V: APF, Ty,
2766 DenormMode: nvvm::GetNVVMDenormMode(
2767 ShouldFTZ: nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2768
2769 case Intrinsic::nvvm_fabs_ftz:
2770 case Intrinsic::nvvm_fabs:
2771 return ConstantFoldFP(
2772 NativeFP: fabs, V: APF, Ty,
2773 DenormMode: nvvm::GetNVVMDenormMode(
2774 ShouldFTZ: nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2775
2776 case Intrinsic::nvvm_floor_ftz_f:
2777 case Intrinsic::nvvm_floor_f:
2778 case Intrinsic::nvvm_floor_d:
2779 return ConstantFoldFP(
2780 NativeFP: floor, V: APF, Ty,
2781 DenormMode: nvvm::GetNVVMDenormMode(
2782 ShouldFTZ: nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2783
2784 case Intrinsic::nvvm_rcp_rm_ftz_f:
2785 case Intrinsic::nvvm_rcp_rn_ftz_f:
2786 case Intrinsic::nvvm_rcp_rp_ftz_f:
2787 case Intrinsic::nvvm_rcp_rz_ftz_f:
2788 case Intrinsic::nvvm_rcp_rm_d:
2789 case Intrinsic::nvvm_rcp_rm_f:
2790 case Intrinsic::nvvm_rcp_rn_d:
2791 case Intrinsic::nvvm_rcp_rn_f:
2792 case Intrinsic::nvvm_rcp_rp_d:
2793 case Intrinsic::nvvm_rcp_rp_f:
2794 case Intrinsic::nvvm_rcp_rz_d:
2795 case Intrinsic::nvvm_rcp_rz_f: {
2796 APFloat::roundingMode RoundMode = nvvm::GetRCPRoundingMode(IntrinsicID);
2797 bool IsFTZ = nvvm::RCPShouldFTZ(IntrinsicID);
2798
2799 auto Denominator = IsFTZ ? FTZPreserveSign(V: APF) : APF;
2800 APFloat Res = APFloat::getOne(Sem: APF.getSemantics());
2801 APFloat::opStatus Status = Res.divide(RHS: Denominator, RM: RoundMode);
2802
2803 if (Status == APFloat::opOK || Status == APFloat::opInexact) {
2804 if (IsFTZ)
2805 Res = FTZPreserveSign(V: Res);
2806 return ConstantFP::get(Ty, V: Res);
2807 }
2808 return nullptr;
2809 }
2810
2811 case Intrinsic::nvvm_round_ftz_f:
2812 case Intrinsic::nvvm_round_f:
2813 case Intrinsic::nvvm_round_d: {
2814 // nvvm_round is lowered to PTX cvt.rni, which will round to nearest
2815 // integer, choosing even integer if source is equidistant between two
2816 // integers, so the semantics are closer to "rint" rather than "round".
2817 bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID);
2818 auto V = IsFTZ ? FTZPreserveSign(V: APF) : APF;
2819 V.roundToIntegral(RM: APFloat::rmNearestTiesToEven);
2820 return ConstantFP::get(Ty, V);
2821 }
2822
2823 case Intrinsic::nvvm_saturate_ftz_f:
2824 case Intrinsic::nvvm_saturate_d:
2825 case Intrinsic::nvvm_saturate_f: {
2826 bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID);
2827 auto V = IsFTZ ? FTZPreserveSign(V: APF) : APF;
2828 if (V.isNegative() || V.isZero() || V.isNaN())
2829 return ConstantFP::getZero(Ty);
2830 APFloat One = APFloat::getOne(Sem: APF.getSemantics());
2831 if (V > One)
2832 return ConstantFP::get(Ty, V: One);
2833 return ConstantFP::get(Ty, V: APF);
2834 }
2835
2836 case Intrinsic::nvvm_sqrt_rn_ftz_f:
2837 case Intrinsic::nvvm_sqrt_f:
2838 case Intrinsic::nvvm_sqrt_rn_d:
2839 case Intrinsic::nvvm_sqrt_rn_f:
2840 if (APF.isNegative())
2841 return nullptr;
2842 return ConstantFoldFP(
2843 NativeFP: sqrt, V: APF, Ty,
2844 DenormMode: nvvm::GetNVVMDenormMode(
2845 ShouldFTZ: nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID)));
2846
2847 // AMDGCN Intrinsics:
2848 case Intrinsic::amdgcn_cos:
2849 case Intrinsic::amdgcn_sin: {
2850 double V = getValueAsDouble(Op);
2851 if (V < -256.0 || V > 256.0)
2852 // The gfx8 and gfx9 architectures handle arguments outside the range
2853 // [-256, 256] differently. This should be a rare case so bail out
2854 // rather than trying to handle the difference.
2855 return nullptr;
2856 bool IsCos = IntrinsicID == Intrinsic::amdgcn_cos;
2857 double V4 = V * 4.0;
2858 if (V4 == floor(x: V4)) {
2859 // Force exact results for quarter-integer inputs.
2860 const double SinVals[4] = { 0.0, 1.0, 0.0, -1.0 };
2861 V = SinVals[((int)V4 + (IsCos ? 1 : 0)) & 3];
2862 } else {
2863 if (IsCos)
2864 V = cos(x: V * 2.0 * numbers::pi);
2865 else
2866 V = sin(x: V * 2.0 * numbers::pi);
2867 }
2868 return GetConstantFoldFPValue(V, Ty);
2869 }
2870 }
2871
2872 if (!TLI)
2873 return nullptr;
2874
2875 LibFunc Func = NotLibFunc;
2876 if (!TLI->getLibFunc(funcName: Name, F&: Func))
2877 return nullptr;
2878
2879 switch (Func) {
2880 default:
2881 break;
2882 case LibFunc_acos:
2883 case LibFunc_acosf:
2884 case LibFunc_acos_finite:
2885 case LibFunc_acosf_finite:
2886 if (TLI->has(F: Func))
2887 return ConstantFoldFP(NativeFP: acos, V: APF, Ty);
2888 break;
2889 case LibFunc_asin:
2890 case LibFunc_asinf:
2891 case LibFunc_asin_finite:
2892 case LibFunc_asinf_finite:
2893 if (TLI->has(F: Func))
2894 return ConstantFoldFP(NativeFP: asin, V: APF, Ty);
2895 break;
2896 case LibFunc_atan:
2897 case LibFunc_atanf:
2898 // Implement optional behavior from C's Annex F for +/-0.0.
2899 if (U.isZero())
2900 return ConstantFP::get(Ty, V: U);
2901 if (TLI->has(F: Func))
2902 return ConstantFoldFP(NativeFP: atan, V: APF, Ty);
2903 break;
2904 case LibFunc_ceil:
2905 case LibFunc_ceilf:
2906 if (TLI->has(F: Func)) {
2907 U.roundToIntegral(RM: APFloat::rmTowardPositive);
2908 return ConstantFP::get(Ty, V: U);
2909 }
2910 break;
2911 case LibFunc_cos:
2912 case LibFunc_cosf:
2913 if (TLI->has(F: Func))
2914 return ConstantFoldFP(NativeFP: cos, V: APF, Ty);
2915 break;
2916 case LibFunc_cosh:
2917 case LibFunc_coshf:
2918 case LibFunc_cosh_finite:
2919 case LibFunc_coshf_finite:
2920 if (TLI->has(F: Func))
2921 return ConstantFoldFP(NativeFP: cosh, V: APF, Ty);
2922 break;
2923 case LibFunc_exp:
2924 case LibFunc_expf:
2925 case LibFunc_exp_finite:
2926 case LibFunc_expf_finite:
2927 if (TLI->has(F: Func))
2928 return ConstantFoldFP(NativeFP: exp, V: APF, Ty);
2929 break;
2930 case LibFunc_exp2:
2931 case LibFunc_exp2f:
2932 case LibFunc_exp2_finite:
2933 case LibFunc_exp2f_finite:
2934 if (TLI->has(F: Func))
2935 // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
2936 return ConstantFoldBinaryFP(NativeFP: pow, V: APFloat(2.0), W: APF, Ty);
2937 break;
2938 case LibFunc_fabs:
2939 case LibFunc_fabsf:
2940 if (TLI->has(F: Func)) {
2941 U.clearSign();
2942 return ConstantFP::get(Ty, V: U);
2943 }
2944 break;
2945 case LibFunc_floor:
2946 case LibFunc_floorf:
2947 if (TLI->has(F: Func)) {
2948 U.roundToIntegral(RM: APFloat::rmTowardNegative);
2949 return ConstantFP::get(Ty, V: U);
2950 }
2951 break;
2952 case LibFunc_log:
2953 case LibFunc_logf:
2954 case LibFunc_log_finite:
2955 case LibFunc_logf_finite:
2956 if (!APF.isNegative() && !APF.isZero() && TLI->has(F: Func))
2957 return ConstantFoldFP(NativeFP: log, V: APF, Ty);
2958 break;
2959 case LibFunc_log2:
2960 case LibFunc_log2f:
2961 case LibFunc_log2_finite:
2962 case LibFunc_log2f_finite:
2963 if (!APF.isNegative() && !APF.isZero() && TLI->has(F: Func))
2964 // TODO: What about hosts that lack a C99 library?
2965 return ConstantFoldFP(NativeFP: log2, V: APF, Ty);
2966 break;
2967 case LibFunc_log10:
2968 case LibFunc_log10f:
2969 case LibFunc_log10_finite:
2970 case LibFunc_log10f_finite:
2971 if (!APF.isNegative() && !APF.isZero() && TLI->has(F: Func))
2972 // TODO: What about hosts that lack a C99 library?
2973 return ConstantFoldFP(NativeFP: log10, V: APF, Ty);
2974 break;
2975 case LibFunc_ilogb:
2976 case LibFunc_ilogbf:
2977 if (!APF.isZero() && TLI->has(F: Func))
2978 return ConstantInt::get(Ty, V: ilogb(Arg: APF), IsSigned: true);
2979 break;
2980 case LibFunc_logb:
2981 case LibFunc_logbf:
2982 if (!APF.isZero() && TLI->has(F: Func))
2983 return ConstantFoldFP(NativeFP: logb, V: APF, Ty);
2984 break;
2985 case LibFunc_log1p:
2986 case LibFunc_log1pf:
2987 // Implement optional behavior from C's Annex F for +/-0.0.
2988 if (U.isZero())
2989 return ConstantFP::get(Ty, V: U);
2990 if (APF > APFloat::getOne(Sem: APF.getSemantics(), Negative: true) && TLI->has(F: Func))
2991 return ConstantFoldFP(NativeFP: log1p, V: APF, Ty);
2992 break;
2993 case LibFunc_logl:
2994 return nullptr;
2995 case LibFunc_erf:
2996 case LibFunc_erff:
2997 if (TLI->has(F: Func))
2998 return ConstantFoldFP(NativeFP: erf, V: APF, Ty);
2999 break;
3000 case LibFunc_nearbyint:
3001 case LibFunc_nearbyintf:
3002 case LibFunc_rint:
3003 case LibFunc_rintf:
3004 case LibFunc_roundeven:
3005 case LibFunc_roundevenf:
3006 if (TLI->has(F: Func)) {
3007 U.roundToIntegral(RM: APFloat::rmNearestTiesToEven);
3008 return ConstantFP::get(Ty, V: U);
3009 }
3010 break;
3011 case LibFunc_round:
3012 case LibFunc_roundf:
3013 if (TLI->has(F: Func)) {
3014 U.roundToIntegral(RM: APFloat::rmNearestTiesToAway);
3015 return ConstantFP::get(Ty, V: U);
3016 }
3017 break;
3018 case LibFunc_sin:
3019 case LibFunc_sinf:
3020 if (TLI->has(F: Func))
3021 return ConstantFoldFP(NativeFP: sin, V: APF, Ty);
3022 break;
3023 case LibFunc_sinh:
3024 case LibFunc_sinhf:
3025 case LibFunc_sinh_finite:
3026 case LibFunc_sinhf_finite:
3027 if (TLI->has(F: Func))
3028 return ConstantFoldFP(NativeFP: sinh, V: APF, Ty);
3029 break;
3030 case LibFunc_sqrt:
3031 case LibFunc_sqrtf:
3032 if (!APF.isNegative() && TLI->has(F: Func))
3033 return ConstantFoldFP(NativeFP: sqrt, V: APF, Ty);
3034 break;
3035 case LibFunc_tan:
3036 case LibFunc_tanf:
3037 if (TLI->has(F: Func))
3038 return ConstantFoldFP(NativeFP: tan, V: APF, Ty);
3039 break;
3040 case LibFunc_tanh:
3041 case LibFunc_tanhf:
3042 if (TLI->has(F: Func))
3043 return ConstantFoldFP(NativeFP: tanh, V: APF, Ty);
3044 break;
3045 case LibFunc_trunc:
3046 case LibFunc_truncf:
3047 if (TLI->has(F: Func)) {
3048 U.roundToIntegral(RM: APFloat::rmTowardZero);
3049 return ConstantFP::get(Ty, V: U);
3050 }
3051 break;
3052 }
3053 return nullptr;
3054 }
3055
3056 if (auto *Op = dyn_cast<ConstantInt>(Val: Operands[0])) {
3057 switch (IntrinsicID) {
3058 case Intrinsic::bswap:
3059 return ConstantInt::get(Context&: Ty->getContext(), V: Op->getValue().byteSwap());
3060 case Intrinsic::ctpop:
3061 return ConstantInt::get(Ty, V: Op->getValue().popcount());
3062 case Intrinsic::bitreverse:
3063 return ConstantInt::get(Context&: Ty->getContext(), V: Op->getValue().reverseBits());
3064 case Intrinsic::amdgcn_s_wqm: {
3065 uint64_t Val = Op->getZExtValue();
3066 Val |= (Val & 0x5555555555555555ULL) << 1 |
3067 ((Val >> 1) & 0x5555555555555555ULL);
3068 Val |= (Val & 0x3333333333333333ULL) << 2 |
3069 ((Val >> 2) & 0x3333333333333333ULL);
3070 return ConstantInt::get(Ty, V: Val);
3071 }
3072
3073 case Intrinsic::amdgcn_s_quadmask: {
3074 uint64_t Val = Op->getZExtValue();
3075 uint64_t QuadMask = 0;
3076 for (unsigned I = 0; I < Op->getBitWidth() / 4; ++I, Val >>= 4) {
3077 if (!(Val & 0xF))
3078 continue;
3079
3080 QuadMask |= (1ULL << I);
3081 }
3082 return ConstantInt::get(Ty, V: QuadMask);
3083 }
3084
3085 case Intrinsic::amdgcn_s_bitreplicate: {
3086 uint64_t Val = Op->getZExtValue();
3087 Val = (Val & 0x000000000000FFFFULL) | (Val & 0x00000000FFFF0000ULL) << 16;
3088 Val = (Val & 0x000000FF000000FFULL) | (Val & 0x0000FF000000FF00ULL) << 8;
3089 Val = (Val & 0x000F000F000F000FULL) | (Val & 0x00F000F000F000F0ULL) << 4;
3090 Val = (Val & 0x0303030303030303ULL) | (Val & 0x0C0C0C0C0C0C0C0CULL) << 2;
3091 Val = (Val & 0x1111111111111111ULL) | (Val & 0x2222222222222222ULL) << 1;
3092 Val = Val | Val << 1;
3093 return ConstantInt::get(Ty, V: Val);
3094 }
3095 }
3096 }
3097
3098 if (Operands[0]->getType()->isVectorTy()) {
3099 auto *Op = cast<Constant>(Val: Operands[0]);
3100 switch (IntrinsicID) {
3101 default: break;
3102 case Intrinsic::vector_reduce_add:
3103 case Intrinsic::vector_reduce_mul:
3104 case Intrinsic::vector_reduce_and:
3105 case Intrinsic::vector_reduce_or:
3106 case Intrinsic::vector_reduce_xor:
3107 case Intrinsic::vector_reduce_smin:
3108 case Intrinsic::vector_reduce_smax:
3109 case Intrinsic::vector_reduce_umin:
3110 case Intrinsic::vector_reduce_umax:
3111 if (Constant *C = constantFoldVectorReduce(IID: IntrinsicID, Op: Operands[0]))
3112 return C;
3113 break;
3114 case Intrinsic::x86_sse_cvtss2si:
3115 case Intrinsic::x86_sse_cvtss2si64:
3116 case Intrinsic::x86_sse2_cvtsd2si:
3117 case Intrinsic::x86_sse2_cvtsd2si64:
3118 if (ConstantFP *FPOp =
3119 dyn_cast_or_null<ConstantFP>(Val: Op->getAggregateElement(Elt: 0U)))
3120 return ConstantFoldSSEConvertToInt(Val: FPOp->getValueAPF(),
3121 /*roundTowardZero=*/false, Ty,
3122 /*IsSigned*/true);
3123 break;
3124 case Intrinsic::x86_sse_cvttss2si:
3125 case Intrinsic::x86_sse_cvttss2si64:
3126 case Intrinsic::x86_sse2_cvttsd2si:
3127 case Intrinsic::x86_sse2_cvttsd2si64:
3128 if (ConstantFP *FPOp =
3129 dyn_cast_or_null<ConstantFP>(Val: Op->getAggregateElement(Elt: 0U)))
3130 return ConstantFoldSSEConvertToInt(Val: FPOp->getValueAPF(),
3131 /*roundTowardZero=*/true, Ty,
3132 /*IsSigned*/true);
3133 break;
3134
3135 case Intrinsic::wasm_anytrue:
3136 return Op->isZeroValue() ? ConstantInt::get(Ty, V: 0)
3137 : ConstantInt::get(Ty, V: 1);
3138
3139 case Intrinsic::wasm_alltrue:
3140 // Check each element individually
3141 unsigned E = cast<FixedVectorType>(Val: Op->getType())->getNumElements();
3142 for (unsigned I = 0; I != E; ++I) {
3143 Constant *Elt = Op->getAggregateElement(Elt: I);
3144 // Return false as soon as we find a non-true element.
3145 if (Elt && Elt->isZeroValue())
3146 return ConstantInt::get(Ty, V: 0);
3147 // Bail as soon as we find an element we cannot prove to be true.
3148 if (!Elt || !isa<ConstantInt>(Val: Elt))
3149 return nullptr;
3150 }
3151
3152 return ConstantInt::get(Ty, V: 1);
3153 }
3154 }
3155
3156 return nullptr;
3157}
3158
3159static Constant *evaluateCompare(const APFloat &Op1, const APFloat &Op2,
3160 const ConstrainedFPIntrinsic *Call) {
3161 APFloat::opStatus St = APFloat::opOK;
3162 auto *FCmp = cast<ConstrainedFPCmpIntrinsic>(Val: Call);
3163 FCmpInst::Predicate Cond = FCmp->getPredicate();
3164 if (FCmp->isSignaling()) {
3165 if (Op1.isNaN() || Op2.isNaN())
3166 St = APFloat::opInvalidOp;
3167 } else {
3168 if (Op1.isSignaling() || Op2.isSignaling())
3169 St = APFloat::opInvalidOp;
3170 }
3171 bool Result = FCmpInst::compare(LHS: Op1, RHS: Op2, Pred: Cond);
3172 if (mayFoldConstrained(CI: const_cast<ConstrainedFPCmpIntrinsic *>(FCmp), St))
3173 return ConstantInt::get(Ty: Call->getType()->getScalarType(), V: Result);
3174 return nullptr;
3175}
3176
3177static Constant *ConstantFoldLibCall2(StringRef Name, Type *Ty,
3178 ArrayRef<Constant *> Operands,
3179 const TargetLibraryInfo *TLI) {
3180 if (!TLI)
3181 return nullptr;
3182
3183 LibFunc Func = NotLibFunc;
3184 if (!TLI->getLibFunc(funcName: Name, F&: Func))
3185 return nullptr;
3186
3187 const auto *Op1 = dyn_cast<ConstantFP>(Val: Operands[0]);
3188 if (!Op1)
3189 return nullptr;
3190
3191 const auto *Op2 = dyn_cast<ConstantFP>(Val: Operands[1]);
3192 if (!Op2)
3193 return nullptr;
3194
3195 const APFloat &Op1V = Op1->getValueAPF();
3196 const APFloat &Op2V = Op2->getValueAPF();
3197
3198 switch (Func) {
3199 default:
3200 break;
3201 case LibFunc_pow:
3202 case LibFunc_powf:
3203 case LibFunc_pow_finite:
3204 case LibFunc_powf_finite:
3205 if (TLI->has(F: Func))
3206 return ConstantFoldBinaryFP(NativeFP: pow, V: Op1V, W: Op2V, Ty);
3207 break;
3208 case LibFunc_fmod:
3209 case LibFunc_fmodf:
3210 if (TLI->has(F: Func)) {
3211 APFloat V = Op1->getValueAPF();
3212 if (APFloat::opStatus::opOK == V.mod(RHS: Op2->getValueAPF()))
3213 return ConstantFP::get(Ty, V);
3214 }
3215 break;
3216 case LibFunc_remainder:
3217 case LibFunc_remainderf:
3218 if (TLI->has(F: Func)) {
3219 APFloat V = Op1->getValueAPF();
3220 if (APFloat::opStatus::opOK == V.remainder(RHS: Op2->getValueAPF()))
3221 return ConstantFP::get(Ty, V);
3222 }
3223 break;
3224 case LibFunc_atan2:
3225 case LibFunc_atan2f:
3226 // atan2(+/-0.0, +/-0.0) is known to raise an exception on some libm
3227 // (Solaris), so we do not assume a known result for that.
3228 if (Op1V.isZero() && Op2V.isZero())
3229 return nullptr;
3230 [[fallthrough]];
3231 case LibFunc_atan2_finite:
3232 case LibFunc_atan2f_finite:
3233 if (TLI->has(F: Func))
3234 return ConstantFoldBinaryFP(NativeFP: atan2, V: Op1V, W: Op2V, Ty);
3235 break;
3236 }
3237
3238 return nullptr;
3239}
3240
3241static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
3242 ArrayRef<Constant *> Operands,
3243 const CallBase *Call) {
3244 assert(Operands.size() == 2 && "Wrong number of operands.");
3245
3246 if (Ty->isFloatingPointTy()) {
3247 // TODO: We should have undef handling for all of the FP intrinsics that
3248 // are attempted to be folded in this function.
3249 bool IsOp0Undef = isa<UndefValue>(Val: Operands[0]);
3250 bool IsOp1Undef = isa<UndefValue>(Val: Operands[1]);
3251 switch (IntrinsicID) {
3252 case Intrinsic::maxnum:
3253 case Intrinsic::minnum:
3254 case Intrinsic::maximum:
3255 case Intrinsic::minimum:
3256 case Intrinsic::maximumnum:
3257 case Intrinsic::minimumnum:
3258 case Intrinsic::nvvm_fmax_d:
3259 case Intrinsic::nvvm_fmin_d:
3260 // If one argument is undef, return the other argument.
3261 if (IsOp0Undef)
3262 return Operands[1];
3263 if (IsOp1Undef)
3264 return Operands[0];
3265 break;
3266
3267 case Intrinsic::nvvm_fmax_f:
3268 case Intrinsic::nvvm_fmax_ftz_f:
3269 case Intrinsic::nvvm_fmax_ftz_nan_f:
3270 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
3271 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
3272 case Intrinsic::nvvm_fmax_nan_f:
3273 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
3274 case Intrinsic::nvvm_fmax_xorsign_abs_f:
3275
3276 case Intrinsic::nvvm_fmin_f:
3277 case Intrinsic::nvvm_fmin_ftz_f:
3278 case Intrinsic::nvvm_fmin_ftz_nan_f:
3279 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
3280 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
3281 case Intrinsic::nvvm_fmin_nan_f:
3282 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
3283 case Intrinsic::nvvm_fmin_xorsign_abs_f:
3284 // If one arg is undef, the other arg can be returned only if it is
3285 // constant, as we may need to flush it to sign-preserving zero or
3286 // canonicalize the NaN.
3287 if (!IsOp0Undef && !IsOp1Undef)
3288 break;
3289 if (auto *Op = dyn_cast<ConstantFP>(Val: Operands[IsOp0Undef ? 1 : 0])) {
3290 if (Op->isNaN()) {
3291 APInt NVCanonicalNaN(32, 0x7fffffff);
3292 return ConstantFP::get(
3293 Ty, V: APFloat(Ty->getFltSemantics(), NVCanonicalNaN));
3294 }
3295 if (nvvm::FMinFMaxShouldFTZ(IntrinsicID))
3296 return ConstantFP::get(Ty, V: FTZPreserveSign(V: Op->getValueAPF()));
3297 else
3298 return Op;
3299 }
3300 break;
3301 }
3302 }
3303
3304 if (const auto *Op1 = dyn_cast<ConstantFP>(Val: Operands[0])) {
3305 const APFloat &Op1V = Op1->getValueAPF();
3306
3307 if (const auto *Op2 = dyn_cast<ConstantFP>(Val: Operands[1])) {
3308 if (Op2->getType() != Op1->getType())
3309 return nullptr;
3310 const APFloat &Op2V = Op2->getValueAPF();
3311
3312 if (const auto *ConstrIntr =
3313 dyn_cast_if_present<ConstrainedFPIntrinsic>(Val: Call)) {
3314 RoundingMode RM = getEvaluationRoundingMode(CI: ConstrIntr);
3315 APFloat Res = Op1V;
3316 APFloat::opStatus St;
3317 switch (IntrinsicID) {
3318 default:
3319 return nullptr;
3320 case Intrinsic::experimental_constrained_fadd:
3321 St = Res.add(RHS: Op2V, RM);
3322 break;
3323 case Intrinsic::experimental_constrained_fsub:
3324 St = Res.subtract(RHS: Op2V, RM);
3325 break;
3326 case Intrinsic::experimental_constrained_fmul:
3327 St = Res.multiply(RHS: Op2V, RM);
3328 break;
3329 case Intrinsic::experimental_constrained_fdiv:
3330 St = Res.divide(RHS: Op2V, RM);
3331 break;
3332 case Intrinsic::experimental_constrained_frem:
3333 St = Res.mod(RHS: Op2V);
3334 break;
3335 case Intrinsic::experimental_constrained_fcmp:
3336 case Intrinsic::experimental_constrained_fcmps:
3337 return evaluateCompare(Op1: Op1V, Op2: Op2V, Call: ConstrIntr);
3338 }
3339 if (mayFoldConstrained(CI: const_cast<ConstrainedFPIntrinsic *>(ConstrIntr),
3340 St))
3341 return ConstantFP::get(Ty, V: Res);
3342 return nullptr;
3343 }
3344
3345 switch (IntrinsicID) {
3346 default:
3347 break;
3348 case Intrinsic::copysign:
3349 return ConstantFP::get(Ty, V: APFloat::copySign(Value: Op1V, Sign: Op2V));
3350 case Intrinsic::minnum:
3351 if (Op1V.isSignaling() || Op2V.isSignaling())
3352 return nullptr;
3353 return ConstantFP::get(Ty, V: minnum(A: Op1V, B: Op2V));
3354 case Intrinsic::maxnum:
3355 if (Op1V.isSignaling() || Op2V.isSignaling())
3356 return nullptr;
3357 return ConstantFP::get(Ty, V: maxnum(A: Op1V, B: Op2V));
3358 case Intrinsic::minimum:
3359 return ConstantFP::get(Ty, V: minimum(A: Op1V, B: Op2V));
3360 case Intrinsic::maximum:
3361 return ConstantFP::get(Ty, V: maximum(A: Op1V, B: Op2V));
3362 case Intrinsic::minimumnum:
3363 return ConstantFP::get(Ty, V: minimumnum(A: Op1V, B: Op2V));
3364 case Intrinsic::maximumnum:
3365 return ConstantFP::get(Ty, V: maximumnum(A: Op1V, B: Op2V));
3366
3367 case Intrinsic::nvvm_fmax_d:
3368 case Intrinsic::nvvm_fmax_f:
3369 case Intrinsic::nvvm_fmax_ftz_f:
3370 case Intrinsic::nvvm_fmax_ftz_nan_f:
3371 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
3372 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
3373 case Intrinsic::nvvm_fmax_nan_f:
3374 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
3375 case Intrinsic::nvvm_fmax_xorsign_abs_f:
3376
3377 case Intrinsic::nvvm_fmin_d:
3378 case Intrinsic::nvvm_fmin_f:
3379 case Intrinsic::nvvm_fmin_ftz_f:
3380 case Intrinsic::nvvm_fmin_ftz_nan_f:
3381 case Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f:
3382 case Intrinsic::nvvm_fmin_ftz_xorsign_abs_f:
3383 case Intrinsic::nvvm_fmin_nan_f:
3384 case Intrinsic::nvvm_fmin_nan_xorsign_abs_f:
3385 case Intrinsic::nvvm_fmin_xorsign_abs_f: {
3386
3387 bool ShouldCanonicalizeNaNs = !(IntrinsicID == Intrinsic::nvvm_fmax_d ||
3388 IntrinsicID == Intrinsic::nvvm_fmin_d);
3389 bool IsFTZ = nvvm::FMinFMaxShouldFTZ(IntrinsicID);
3390 bool IsNaNPropagating = nvvm::FMinFMaxPropagatesNaNs(IntrinsicID);
3391 bool IsXorSignAbs = nvvm::FMinFMaxIsXorSignAbs(IntrinsicID);
3392
3393 APFloat A = IsFTZ ? FTZPreserveSign(V: Op1V) : Op1V;
3394 APFloat B = IsFTZ ? FTZPreserveSign(V: Op2V) : Op2V;
3395
3396 bool XorSign = false;
3397 if (IsXorSignAbs) {
3398 XorSign = A.isNegative() ^ B.isNegative();
3399 A = abs(X: A);
3400 B = abs(X: B);
3401 }
3402
3403 bool IsFMax = false;
3404 switch (IntrinsicID) {
3405 case Intrinsic::nvvm_fmax_d:
3406 case Intrinsic::nvvm_fmax_f:
3407 case Intrinsic::nvvm_fmax_ftz_f:
3408 case Intrinsic::nvvm_fmax_ftz_nan_f:
3409 case Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f:
3410 case Intrinsic::nvvm_fmax_ftz_xorsign_abs_f:
3411 case Intrinsic::nvvm_fmax_nan_f:
3412 case Intrinsic::nvvm_fmax_nan_xorsign_abs_f:
3413 case Intrinsic::nvvm_fmax_xorsign_abs_f:
3414 IsFMax = true;
3415 break;
3416 }
3417 APFloat Res = IsFMax ? maximum(A, B) : minimum(A, B);
3418
3419 if (ShouldCanonicalizeNaNs) {
3420 APFloat NVCanonicalNaN(Res.getSemantics(), APInt(32, 0x7fffffff));
3421 if (A.isNaN() && B.isNaN())
3422 return ConstantFP::get(Ty, V: NVCanonicalNaN);
3423 else if (IsNaNPropagating && (A.isNaN() || B.isNaN()))
3424 return ConstantFP::get(Ty, V: NVCanonicalNaN);
3425 }
3426
3427 if (A.isNaN() && B.isNaN())
3428 return Operands[1];
3429 else if (A.isNaN())
3430 Res = B;
3431 else if (B.isNaN())
3432 Res = A;
3433
3434 if (IsXorSignAbs && XorSign != Res.isNegative())
3435 Res.changeSign();
3436
3437 return ConstantFP::get(Ty, V: Res);
3438 }
3439
3440 case Intrinsic::nvvm_add_rm_f:
3441 case Intrinsic::nvvm_add_rn_f:
3442 case Intrinsic::nvvm_add_rp_f:
3443 case Intrinsic::nvvm_add_rz_f:
3444 case Intrinsic::nvvm_add_rm_d:
3445 case Intrinsic::nvvm_add_rn_d:
3446 case Intrinsic::nvvm_add_rp_d:
3447 case Intrinsic::nvvm_add_rz_d:
3448 case Intrinsic::nvvm_add_rm_ftz_f:
3449 case Intrinsic::nvvm_add_rn_ftz_f:
3450 case Intrinsic::nvvm_add_rp_ftz_f:
3451 case Intrinsic::nvvm_add_rz_ftz_f: {
3452
3453 bool IsFTZ = nvvm::FAddShouldFTZ(IntrinsicID);
3454 APFloat A = IsFTZ ? FTZPreserveSign(V: Op1V) : Op1V;
3455 APFloat B = IsFTZ ? FTZPreserveSign(V: Op2V) : Op2V;
3456
3457 APFloat::roundingMode RoundMode =
3458 nvvm::GetFAddRoundingMode(IntrinsicID);
3459
3460 APFloat Res = A;
3461 APFloat::opStatus Status = Res.add(RHS: B, RM: RoundMode);
3462
3463 if (!Res.isNaN() &&
3464 (Status == APFloat::opOK || Status == APFloat::opInexact)) {
3465 Res = IsFTZ ? FTZPreserveSign(V: Res) : Res;
3466 return ConstantFP::get(Ty, V: Res);
3467 }
3468 return nullptr;
3469 }
3470
3471 case Intrinsic::nvvm_mul_rm_f:
3472 case Intrinsic::nvvm_mul_rn_f:
3473 case Intrinsic::nvvm_mul_rp_f:
3474 case Intrinsic::nvvm_mul_rz_f:
3475 case Intrinsic::nvvm_mul_rm_d:
3476 case Intrinsic::nvvm_mul_rn_d:
3477 case Intrinsic::nvvm_mul_rp_d:
3478 case Intrinsic::nvvm_mul_rz_d:
3479 case Intrinsic::nvvm_mul_rm_ftz_f:
3480 case Intrinsic::nvvm_mul_rn_ftz_f:
3481 case Intrinsic::nvvm_mul_rp_ftz_f:
3482 case Intrinsic::nvvm_mul_rz_ftz_f: {
3483
3484 bool IsFTZ = nvvm::FMulShouldFTZ(IntrinsicID);
3485 APFloat A = IsFTZ ? FTZPreserveSign(V: Op1V) : Op1V;
3486 APFloat B = IsFTZ ? FTZPreserveSign(V: Op2V) : Op2V;
3487
3488 APFloat::roundingMode RoundMode =
3489 nvvm::GetFMulRoundingMode(IntrinsicID);
3490
3491 APFloat Res = A;
3492 APFloat::opStatus Status = Res.multiply(RHS: B, RM: RoundMode);
3493
3494 if (!Res.isNaN() &&
3495 (Status == APFloat::opOK || Status == APFloat::opInexact)) {
3496 Res = IsFTZ ? FTZPreserveSign(V: Res) : Res;
3497 return ConstantFP::get(Ty, V: Res);
3498 }
3499 return nullptr;
3500 }
3501
3502 case Intrinsic::nvvm_div_rm_f:
3503 case Intrinsic::nvvm_div_rn_f:
3504 case Intrinsic::nvvm_div_rp_f:
3505 case Intrinsic::nvvm_div_rz_f:
3506 case Intrinsic::nvvm_div_rm_d:
3507 case Intrinsic::nvvm_div_rn_d:
3508 case Intrinsic::nvvm_div_rp_d:
3509 case Intrinsic::nvvm_div_rz_d:
3510 case Intrinsic::nvvm_div_rm_ftz_f:
3511 case Intrinsic::nvvm_div_rn_ftz_f:
3512 case Intrinsic::nvvm_div_rp_ftz_f:
3513 case Intrinsic::nvvm_div_rz_ftz_f: {
3514 bool IsFTZ = nvvm::FDivShouldFTZ(IntrinsicID);
3515 APFloat A = IsFTZ ? FTZPreserveSign(V: Op1V) : Op1V;
3516 APFloat B = IsFTZ ? FTZPreserveSign(V: Op2V) : Op2V;
3517 APFloat::roundingMode RoundMode =
3518 nvvm::GetFDivRoundingMode(IntrinsicID);
3519
3520 APFloat Res = A;
3521 APFloat::opStatus Status = Res.divide(RHS: B, RM: RoundMode);
3522 if (!Res.isNaN() &&
3523 (Status == APFloat::opOK || Status == APFloat::opInexact)) {
3524 Res = IsFTZ ? FTZPreserveSign(V: Res) : Res;
3525 return ConstantFP::get(Ty, V: Res);
3526 }
3527 return nullptr;
3528 }
3529 }
3530
3531 if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
3532 return nullptr;
3533
3534 switch (IntrinsicID) {
3535 default:
3536 break;
3537 case Intrinsic::pow:
3538 return ConstantFoldBinaryFP(NativeFP: pow, V: Op1V, W: Op2V, Ty);
3539 case Intrinsic::amdgcn_fmul_legacy:
3540 // The legacy behaviour is that multiplying +/- 0.0 by anything, even
3541 // NaN or infinity, gives +0.0.
3542 if (Op1V.isZero() || Op2V.isZero())
3543 return ConstantFP::getZero(Ty);
3544 return ConstantFP::get(Ty, V: Op1V * Op2V);
3545 }
3546
3547 } else if (auto *Op2C = dyn_cast<ConstantInt>(Val: Operands[1])) {
3548 switch (IntrinsicID) {
3549 case Intrinsic::ldexp: {
3550 return ConstantFP::get(
3551 Context&: Ty->getContext(),
3552 V: scalbn(X: Op1V, Exp: Op2C->getSExtValue(), RM: APFloat::rmNearestTiesToEven));
3553 }
3554 case Intrinsic::is_fpclass: {
3555 FPClassTest Mask = static_cast<FPClassTest>(Op2C->getZExtValue());
3556 bool Result =
3557 ((Mask & fcSNan) && Op1V.isNaN() && Op1V.isSignaling()) ||
3558 ((Mask & fcQNan) && Op1V.isNaN() && !Op1V.isSignaling()) ||
3559 ((Mask & fcNegInf) && Op1V.isNegInfinity()) ||
3560 ((Mask & fcNegNormal) && Op1V.isNormal() && Op1V.isNegative()) ||
3561 ((Mask & fcNegSubnormal) && Op1V.isDenormal() && Op1V.isNegative()) ||
3562 ((Mask & fcNegZero) && Op1V.isZero() && Op1V.isNegative()) ||
3563 ((Mask & fcPosZero) && Op1V.isZero() && !Op1V.isNegative()) ||
3564 ((Mask & fcPosSubnormal) && Op1V.isDenormal() && !Op1V.isNegative()) ||
3565 ((Mask & fcPosNormal) && Op1V.isNormal() && !Op1V.isNegative()) ||
3566 ((Mask & fcPosInf) && Op1V.isPosInfinity());
3567 return ConstantInt::get(Ty, V: Result);
3568 }
3569 case Intrinsic::powi: {
3570 int Exp = static_cast<int>(Op2C->getSExtValue());
3571 switch (Ty->getTypeID()) {
3572 case Type::HalfTyID:
3573 case Type::FloatTyID: {
3574 APFloat Res(static_cast<float>(std::pow(x: Op1V.convertToFloat(), y: Exp)));
3575 if (Ty->isHalfTy()) {
3576 bool Unused;
3577 Res.convert(ToSemantics: APFloat::IEEEhalf(), RM: APFloat::rmNearestTiesToEven,
3578 losesInfo: &Unused);
3579 }
3580 return ConstantFP::get(Ty, V: Res);
3581 }
3582 case Type::DoubleTyID:
3583 return ConstantFP::get(Ty, V: std::pow(x: Op1V.convertToDouble(), y: Exp));
3584 default:
3585 return nullptr;
3586 }
3587 }
3588 default:
3589 break;
3590 }
3591 }
3592 return nullptr;
3593 }
3594
3595 if (Operands[0]->getType()->isIntegerTy() &&
3596 Operands[1]->getType()->isIntegerTy()) {
3597 const APInt *C0, *C1;
3598 if (!getConstIntOrUndef(Op: Operands[0], C&: C0) ||
3599 !getConstIntOrUndef(Op: Operands[1], C&: C1))
3600 return nullptr;
3601
3602 switch (IntrinsicID) {
3603 default: break;
3604 case Intrinsic::smax:
3605 case Intrinsic::smin:
3606 case Intrinsic::umax:
3607 case Intrinsic::umin:
3608 if (!C0 && !C1)
3609 return UndefValue::get(T: Ty);
3610 if (!C0 || !C1)
3611 return MinMaxIntrinsic::getSaturationPoint(ID: IntrinsicID, Ty);
3612 return ConstantInt::get(
3613 Ty, V: ICmpInst::compare(LHS: *C0, RHS: *C1,
3614 Pred: MinMaxIntrinsic::getPredicate(ID: IntrinsicID))
3615 ? *C0
3616 : *C1);
3617
3618 case Intrinsic::scmp:
3619 case Intrinsic::ucmp:
3620 if (!C0 || !C1)
3621 return ConstantInt::get(Ty, V: 0);
3622
3623 int Res;
3624 if (IntrinsicID == Intrinsic::scmp)
3625 Res = C0->sgt(RHS: *C1) ? 1 : C0->slt(RHS: *C1) ? -1 : 0;
3626 else
3627 Res = C0->ugt(RHS: *C1) ? 1 : C0->ult(RHS: *C1) ? -1 : 0;
3628 return ConstantInt::get(Ty, V: Res, /*IsSigned=*/true);
3629
3630 case Intrinsic::usub_with_overflow:
3631 case Intrinsic::ssub_with_overflow:
3632 // X - undef -> { 0, false }
3633 // undef - X -> { 0, false }
3634 if (!C0 || !C1)
3635 return Constant::getNullValue(Ty);
3636 [[fallthrough]];
3637 case Intrinsic::uadd_with_overflow:
3638 case Intrinsic::sadd_with_overflow:
3639 // X + undef -> { -1, false }
3640 // undef + x -> { -1, false }
3641 if (!C0 || !C1) {
3642 return ConstantStruct::get(
3643 T: cast<StructType>(Val: Ty),
3644 V: {Constant::getAllOnesValue(Ty: Ty->getStructElementType(N: 0)),
3645 Constant::getNullValue(Ty: Ty->getStructElementType(N: 1))});
3646 }
3647 [[fallthrough]];
3648 case Intrinsic::smul_with_overflow:
3649 case Intrinsic::umul_with_overflow: {
3650 // undef * X -> { 0, false }
3651 // X * undef -> { 0, false }
3652 if (!C0 || !C1)
3653 return Constant::getNullValue(Ty);
3654
3655 APInt Res;
3656 bool Overflow;
3657 switch (IntrinsicID) {
3658 default: llvm_unreachable("Invalid case");
3659 case Intrinsic::sadd_with_overflow:
3660 Res = C0->sadd_ov(RHS: *C1, Overflow);
3661 break;
3662 case Intrinsic::uadd_with_overflow:
3663 Res = C0->uadd_ov(RHS: *C1, Overflow);
3664 break;
3665 case Intrinsic::ssub_with_overflow:
3666 Res = C0->ssub_ov(RHS: *C1, Overflow);
3667 break;
3668 case Intrinsic::usub_with_overflow:
3669 Res = C0->usub_ov(RHS: *C1, Overflow);
3670 break;
3671 case Intrinsic::smul_with_overflow:
3672 Res = C0->smul_ov(RHS: *C1, Overflow);
3673 break;
3674 case Intrinsic::umul_with_overflow:
3675 Res = C0->umul_ov(RHS: *C1, Overflow);
3676 break;
3677 }
3678 Constant *Ops[] = {
3679 ConstantInt::get(Context&: Ty->getContext(), V: Res),
3680 ConstantInt::get(Ty: Type::getInt1Ty(C&: Ty->getContext()), V: Overflow)
3681 };
3682 return ConstantStruct::get(T: cast<StructType>(Val: Ty), V: Ops);
3683 }
3684 case Intrinsic::uadd_sat:
3685 case Intrinsic::sadd_sat:
3686 if (!C0 && !C1)
3687 return UndefValue::get(T: Ty);
3688 if (!C0 || !C1)
3689 return Constant::getAllOnesValue(Ty);
3690 if (IntrinsicID == Intrinsic::uadd_sat)
3691 return ConstantInt::get(Ty, V: C0->uadd_sat(RHS: *C1));
3692 else
3693 return ConstantInt::get(Ty, V: C0->sadd_sat(RHS: *C1));
3694 case Intrinsic::usub_sat:
3695 case Intrinsic::ssub_sat:
3696 if (!C0 && !C1)
3697 return UndefValue::get(T: Ty);
3698 if (!C0 || !C1)
3699 return Constant::getNullValue(Ty);
3700 if (IntrinsicID == Intrinsic::usub_sat)
3701 return ConstantInt::get(Ty, V: C0->usub_sat(RHS: *C1));
3702 else
3703 return ConstantInt::get(Ty, V: C0->ssub_sat(RHS: *C1));
3704 case Intrinsic::cttz:
3705 case Intrinsic::ctlz:
3706 assert(C1 && "Must be constant int");
3707
3708 // cttz(0, 1) and ctlz(0, 1) are poison.
3709 if (C1->isOne() && (!C0 || C0->isZero()))
3710 return PoisonValue::get(T: Ty);
3711 if (!C0)
3712 return Constant::getNullValue(Ty);
3713 if (IntrinsicID == Intrinsic::cttz)
3714 return ConstantInt::get(Ty, V: C0->countr_zero());
3715 else
3716 return ConstantInt::get(Ty, V: C0->countl_zero());
3717
3718 case Intrinsic::abs:
3719 assert(C1 && "Must be constant int");
3720 assert((C1->isOne() || C1->isZero()) && "Must be 0 or 1");
3721
3722 // Undef or minimum val operand with poison min --> poison
3723 if (C1->isOne() && (!C0 || C0->isMinSignedValue()))
3724 return PoisonValue::get(T: Ty);
3725
3726 // Undef operand with no poison min --> 0 (sign bit must be clear)
3727 if (!C0)
3728 return Constant::getNullValue(Ty);
3729
3730 return ConstantInt::get(Ty, V: C0->abs());
3731 case Intrinsic::amdgcn_wave_reduce_umin:
3732 case Intrinsic::amdgcn_wave_reduce_umax:
3733 case Intrinsic::amdgcn_wave_reduce_max:
3734 case Intrinsic::amdgcn_wave_reduce_min:
3735 case Intrinsic::amdgcn_wave_reduce_add:
3736 case Intrinsic::amdgcn_wave_reduce_sub:
3737 case Intrinsic::amdgcn_wave_reduce_and:
3738 case Intrinsic::amdgcn_wave_reduce_or:
3739 case Intrinsic::amdgcn_wave_reduce_xor:
3740 return dyn_cast<Constant>(Val: Operands[0]);
3741 }
3742
3743 return nullptr;
3744 }
3745
3746 // Support ConstantVector in case we have an Undef in the top.
3747 if ((isa<ConstantVector>(Val: Operands[0]) ||
3748 isa<ConstantDataVector>(Val: Operands[0])) &&
3749 // Check for default rounding mode.
3750 // FIXME: Support other rounding modes?
3751 isa<ConstantInt>(Val: Operands[1]) &&
3752 cast<ConstantInt>(Val: Operands[1])->getValue() == 4) {
3753 auto *Op = cast<Constant>(Val: Operands[0]);
3754 switch (IntrinsicID) {
3755 default: break;
3756 case Intrinsic::x86_avx512_vcvtss2si32:
3757 case Intrinsic::x86_avx512_vcvtss2si64:
3758 case Intrinsic::x86_avx512_vcvtsd2si32:
3759 case Intrinsic::x86_avx512_vcvtsd2si64:
3760 if (ConstantFP *FPOp =
3761 dyn_cast_or_null<ConstantFP>(Val: Op->getAggregateElement(Elt: 0U)))
3762 return ConstantFoldSSEConvertToInt(Val: FPOp->getValueAPF(),
3763 /*roundTowardZero=*/false, Ty,
3764 /*IsSigned*/true);
3765 break;
3766 case Intrinsic::x86_avx512_vcvtss2usi32:
3767 case Intrinsic::x86_avx512_vcvtss2usi64:
3768 case Intrinsic::x86_avx512_vcvtsd2usi32:
3769 case Intrinsic::x86_avx512_vcvtsd2usi64:
3770 if (ConstantFP *FPOp =
3771 dyn_cast_or_null<ConstantFP>(Val: Op->getAggregateElement(Elt: 0U)))
3772 return ConstantFoldSSEConvertToInt(Val: FPOp->getValueAPF(),
3773 /*roundTowardZero=*/false, Ty,
3774 /*IsSigned*/false);
3775 break;
3776 case Intrinsic::x86_avx512_cvttss2si:
3777 case Intrinsic::x86_avx512_cvttss2si64:
3778 case Intrinsic::x86_avx512_cvttsd2si:
3779 case Intrinsic::x86_avx512_cvttsd2si64:
3780 if (ConstantFP *FPOp =
3781 dyn_cast_or_null<ConstantFP>(Val: Op->getAggregateElement(Elt: 0U)))
3782 return ConstantFoldSSEConvertToInt(Val: FPOp->getValueAPF(),
3783 /*roundTowardZero=*/true, Ty,
3784 /*IsSigned*/true);
3785 break;
3786 case Intrinsic::x86_avx512_cvttss2usi:
3787 case Intrinsic::x86_avx512_cvttss2usi64:
3788 case Intrinsic::x86_avx512_cvttsd2usi:
3789 case Intrinsic::x86_avx512_cvttsd2usi64:
3790 if (ConstantFP *FPOp =
3791 dyn_cast_or_null<ConstantFP>(Val: Op->getAggregateElement(Elt: 0U)))
3792 return ConstantFoldSSEConvertToInt(Val: FPOp->getValueAPF(),
3793 /*roundTowardZero=*/true, Ty,
3794 /*IsSigned*/false);
3795 break;
3796 }
3797 }
3798 return nullptr;
3799}
3800
3801static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID,
3802 const APFloat &S0,
3803 const APFloat &S1,
3804 const APFloat &S2) {
3805 unsigned ID;
3806 const fltSemantics &Sem = S0.getSemantics();
3807 APFloat MA(Sem), SC(Sem), TC(Sem);
3808 if (abs(X: S2) >= abs(X: S0) && abs(X: S2) >= abs(X: S1)) {
3809 if (S2.isNegative() && S2.isNonZero() && !S2.isNaN()) {
3810 // S2 < 0
3811 ID = 5;
3812 SC = -S0;
3813 } else {
3814 ID = 4;
3815 SC = S0;
3816 }
3817 MA = S2;
3818 TC = -S1;
3819 } else if (abs(X: S1) >= abs(X: S0)) {
3820 if (S1.isNegative() && S1.isNonZero() && !S1.isNaN()) {
3821 // S1 < 0
3822 ID = 3;
3823 TC = -S2;
3824 } else {
3825 ID = 2;
3826 TC = S2;
3827 }
3828 MA = S1;
3829 SC = S0;
3830 } else {
3831 if (S0.isNegative() && S0.isNonZero() && !S0.isNaN()) {
3832 // S0 < 0
3833 ID = 1;
3834 SC = S2;
3835 } else {
3836 ID = 0;
3837 SC = -S2;
3838 }
3839 MA = S0;
3840 TC = -S1;
3841 }
3842 switch (IntrinsicID) {
3843 default:
3844 llvm_unreachable("unhandled amdgcn cube intrinsic");
3845 case Intrinsic::amdgcn_cubeid:
3846 return APFloat(Sem, ID);
3847 case Intrinsic::amdgcn_cubema:
3848 return MA + MA;
3849 case Intrinsic::amdgcn_cubesc:
3850 return SC;
3851 case Intrinsic::amdgcn_cubetc:
3852 return TC;
3853 }
3854}
3855
3856static Constant *ConstantFoldAMDGCNPermIntrinsic(ArrayRef<Constant *> Operands,
3857 Type *Ty) {
3858 const APInt *C0, *C1, *C2;
3859 if (!getConstIntOrUndef(Op: Operands[0], C&: C0) ||
3860 !getConstIntOrUndef(Op: Operands[1], C&: C1) ||
3861 !getConstIntOrUndef(Op: Operands[2], C&: C2))
3862 return nullptr;
3863
3864 if (!C2)
3865 return UndefValue::get(T: Ty);
3866
3867 APInt Val(32, 0);
3868 unsigned NumUndefBytes = 0;
3869 for (unsigned I = 0; I < 32; I += 8) {
3870 unsigned Sel = C2->extractBitsAsZExtValue(numBits: 8, bitPosition: I);
3871 unsigned B = 0;
3872
3873 if (Sel >= 13)
3874 B = 0xff;
3875 else if (Sel == 12)
3876 B = 0x00;
3877 else {
3878 const APInt *Src = ((Sel & 10) == 10 || (Sel & 12) == 4) ? C0 : C1;
3879 if (!Src)
3880 ++NumUndefBytes;
3881 else if (Sel < 8)
3882 B = Src->extractBitsAsZExtValue(numBits: 8, bitPosition: (Sel & 3) * 8);
3883 else
3884 B = Src->extractBitsAsZExtValue(numBits: 1, bitPosition: (Sel & 1) ? 31 : 15) * 0xff;
3885 }
3886
3887 Val.insertBits(SubBits: B, bitPosition: I, numBits: 8);
3888 }
3889
3890 if (NumUndefBytes == 4)
3891 return UndefValue::get(T: Ty);
3892
3893 return ConstantInt::get(Ty, V: Val);
3894}
3895
3896static Constant *ConstantFoldScalarCall3(StringRef Name,
3897 Intrinsic::ID IntrinsicID,
3898 Type *Ty,
3899 ArrayRef<Constant *> Operands,
3900 const TargetLibraryInfo *TLI,
3901 const CallBase *Call) {
3902 assert(Operands.size() == 3 && "Wrong number of operands.");
3903
3904 if (const auto *Op1 = dyn_cast<ConstantFP>(Val: Operands[0])) {
3905 if (const auto *Op2 = dyn_cast<ConstantFP>(Val: Operands[1])) {
3906 if (const auto *Op3 = dyn_cast<ConstantFP>(Val: Operands[2])) {
3907 const APFloat &C1 = Op1->getValueAPF();
3908 const APFloat &C2 = Op2->getValueAPF();
3909 const APFloat &C3 = Op3->getValueAPF();
3910
3911 if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Val: Call)) {
3912 RoundingMode RM = getEvaluationRoundingMode(CI: ConstrIntr);
3913 APFloat Res = C1;
3914 APFloat::opStatus St;
3915 switch (IntrinsicID) {
3916 default:
3917 return nullptr;
3918 case Intrinsic::experimental_constrained_fma:
3919 case Intrinsic::experimental_constrained_fmuladd:
3920 St = Res.fusedMultiplyAdd(Multiplicand: C2, Addend: C3, RM);
3921 break;
3922 }
3923 if (mayFoldConstrained(
3924 CI: const_cast<ConstrainedFPIntrinsic *>(ConstrIntr), St))
3925 return ConstantFP::get(Ty, V: Res);
3926 return nullptr;
3927 }
3928
3929 switch (IntrinsicID) {
3930 default: break;
3931 case Intrinsic::amdgcn_fma_legacy: {
3932 // The legacy behaviour is that multiplying +/- 0.0 by anything, even
3933 // NaN or infinity, gives +0.0.
3934 if (C1.isZero() || C2.isZero()) {
3935 // It's tempting to just return C3 here, but that would give the
3936 // wrong result if C3 was -0.0.
3937 return ConstantFP::get(Ty, V: APFloat(0.0f) + C3);
3938 }
3939 [[fallthrough]];
3940 }
3941 case Intrinsic::fma:
3942 case Intrinsic::fmuladd: {
3943 APFloat V = C1;
3944 V.fusedMultiplyAdd(Multiplicand: C2, Addend: C3, RM: APFloat::rmNearestTiesToEven);
3945 return ConstantFP::get(Ty, V);
3946 }
3947
3948 case Intrinsic::nvvm_fma_rm_f:
3949 case Intrinsic::nvvm_fma_rn_f:
3950 case Intrinsic::nvvm_fma_rp_f:
3951 case Intrinsic::nvvm_fma_rz_f:
3952 case Intrinsic::nvvm_fma_rm_d:
3953 case Intrinsic::nvvm_fma_rn_d:
3954 case Intrinsic::nvvm_fma_rp_d:
3955 case Intrinsic::nvvm_fma_rz_d:
3956 case Intrinsic::nvvm_fma_rm_ftz_f:
3957 case Intrinsic::nvvm_fma_rn_ftz_f:
3958 case Intrinsic::nvvm_fma_rp_ftz_f:
3959 case Intrinsic::nvvm_fma_rz_ftz_f: {
3960 bool IsFTZ = nvvm::FMAShouldFTZ(IntrinsicID);
3961 APFloat A = IsFTZ ? FTZPreserveSign(V: C1) : C1;
3962 APFloat B = IsFTZ ? FTZPreserveSign(V: C2) : C2;
3963 APFloat C = IsFTZ ? FTZPreserveSign(V: C3) : C3;
3964
3965 APFloat::roundingMode RoundMode =
3966 nvvm::GetFMARoundingMode(IntrinsicID);
3967
3968 APFloat Res = A;
3969 APFloat::opStatus Status = Res.fusedMultiplyAdd(Multiplicand: B, Addend: C, RM: RoundMode);
3970
3971 if (!Res.isNaN() &&
3972 (Status == APFloat::opOK || Status == APFloat::opInexact)) {
3973 Res = IsFTZ ? FTZPreserveSign(V: Res) : Res;
3974 return ConstantFP::get(Ty, V: Res);
3975 }
3976 return nullptr;
3977 }
3978
3979 case Intrinsic::amdgcn_cubeid:
3980 case Intrinsic::amdgcn_cubema:
3981 case Intrinsic::amdgcn_cubesc:
3982 case Intrinsic::amdgcn_cubetc: {
3983 APFloat V = ConstantFoldAMDGCNCubeIntrinsic(IntrinsicID, S0: C1, S1: C2, S2: C3);
3984 return ConstantFP::get(Ty, V);
3985 }
3986 }
3987 }
3988 }
3989 }
3990
3991 if (IntrinsicID == Intrinsic::smul_fix ||
3992 IntrinsicID == Intrinsic::smul_fix_sat) {
3993 const APInt *C0, *C1;
3994 if (!getConstIntOrUndef(Op: Operands[0], C&: C0) ||
3995 !getConstIntOrUndef(Op: Operands[1], C&: C1))
3996 return nullptr;
3997
3998 // undef * C -> 0
3999 // C * undef -> 0
4000 if (!C0 || !C1)
4001 return Constant::getNullValue(Ty);
4002
4003 // This code performs rounding towards negative infinity in case the result
4004 // cannot be represented exactly for the given scale. Targets that do care
4005 // about rounding should use a target hook for specifying how rounding
4006 // should be done, and provide their own folding to be consistent with
4007 // rounding. This is the same approach as used by
4008 // DAGTypeLegalizer::ExpandIntRes_MULFIX.
4009 unsigned Scale = cast<ConstantInt>(Val: Operands[2])->getZExtValue();
4010 unsigned Width = C0->getBitWidth();
4011 assert(Scale < Width && "Illegal scale.");
4012 unsigned ExtendedWidth = Width * 2;
4013 APInt Product =
4014 (C0->sext(width: ExtendedWidth) * C1->sext(width: ExtendedWidth)).ashr(ShiftAmt: Scale);
4015 if (IntrinsicID == Intrinsic::smul_fix_sat) {
4016 APInt Max = APInt::getSignedMaxValue(numBits: Width).sext(width: ExtendedWidth);
4017 APInt Min = APInt::getSignedMinValue(numBits: Width).sext(width: ExtendedWidth);
4018 Product = APIntOps::smin(A: Product, B: Max);
4019 Product = APIntOps::smax(A: Product, B: Min);
4020 }
4021 return ConstantInt::get(Context&: Ty->getContext(), V: Product.sextOrTrunc(width: Width));
4022 }
4023
4024 if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {
4025 const APInt *C0, *C1, *C2;
4026 if (!getConstIntOrUndef(Op: Operands[0], C&: C0) ||
4027 !getConstIntOrUndef(Op: Operands[1], C&: C1) ||
4028 !getConstIntOrUndef(Op: Operands[2], C&: C2))
4029 return nullptr;
4030
4031 bool IsRight = IntrinsicID == Intrinsic::fshr;
4032 if (!C2)
4033 return Operands[IsRight ? 1 : 0];
4034 if (!C0 && !C1)
4035 return UndefValue::get(T: Ty);
4036
4037 // The shift amount is interpreted as modulo the bitwidth. If the shift
4038 // amount is effectively 0, avoid UB due to oversized inverse shift below.
4039 unsigned BitWidth = C2->getBitWidth();
4040 unsigned ShAmt = C2->urem(RHS: BitWidth);
4041 if (!ShAmt)
4042 return Operands[IsRight ? 1 : 0];
4043
4044 // (C0 << ShlAmt) | (C1 >> LshrAmt)
4045 unsigned LshrAmt = IsRight ? ShAmt : BitWidth - ShAmt;
4046 unsigned ShlAmt = !IsRight ? ShAmt : BitWidth - ShAmt;
4047 if (!C0)
4048 return ConstantInt::get(Ty, V: C1->lshr(shiftAmt: LshrAmt));
4049 if (!C1)
4050 return ConstantInt::get(Ty, V: C0->shl(shiftAmt: ShlAmt));
4051 return ConstantInt::get(Ty, V: C0->shl(shiftAmt: ShlAmt) | C1->lshr(shiftAmt: LshrAmt));
4052 }
4053
4054 if (IntrinsicID == Intrinsic::amdgcn_perm)
4055 return ConstantFoldAMDGCNPermIntrinsic(Operands, Ty);
4056
4057 return nullptr;
4058}
4059
4060static Constant *ConstantFoldScalarCall(StringRef Name,
4061 Intrinsic::ID IntrinsicID,
4062 Type *Ty,
4063 ArrayRef<Constant *> Operands,
4064 const TargetLibraryInfo *TLI,
4065 const CallBase *Call) {
4066 if (IntrinsicID != Intrinsic::not_intrinsic &&
4067 any_of(Range&: Operands, P: IsaPred<PoisonValue>) &&
4068 intrinsicPropagatesPoison(IID: IntrinsicID))
4069 return PoisonValue::get(T: Ty);
4070
4071 if (Operands.size() == 1)
4072 return ConstantFoldScalarCall1(Name, IntrinsicID, Ty, Operands, TLI, Call);
4073
4074 if (Operands.size() == 2) {
4075 if (Constant *FoldedLibCall =
4076 ConstantFoldLibCall2(Name, Ty, Operands, TLI)) {
4077 return FoldedLibCall;
4078 }
4079 return ConstantFoldIntrinsicCall2(IntrinsicID, Ty, Operands, Call);
4080 }
4081
4082 if (Operands.size() == 3)
4083 return ConstantFoldScalarCall3(Name, IntrinsicID, Ty, Operands, TLI, Call);
4084
4085 return nullptr;
4086}
4087
4088static Constant *ConstantFoldFixedVectorCall(
4089 StringRef Name, Intrinsic::ID IntrinsicID, FixedVectorType *FVTy,
4090 ArrayRef<Constant *> Operands, const DataLayout &DL,
4091 const TargetLibraryInfo *TLI, const CallBase *Call) {
4092 SmallVector<Constant *, 4> Result(FVTy->getNumElements());
4093 SmallVector<Constant *, 4> Lane(Operands.size());
4094 Type *Ty = FVTy->getElementType();
4095
4096 switch (IntrinsicID) {
4097 case Intrinsic::masked_load: {
4098 auto *SrcPtr = Operands[0];
4099 auto *Mask = Operands[1];
4100 auto *Passthru = Operands[2];
4101
4102 Constant *VecData = ConstantFoldLoadFromConstPtr(C: SrcPtr, Ty: FVTy, DL);
4103
4104 SmallVector<Constant *, 32> NewElements;
4105 for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
4106 auto *MaskElt = Mask->getAggregateElement(Elt: I);
4107 if (!MaskElt)
4108 break;
4109 auto *PassthruElt = Passthru->getAggregateElement(Elt: I);
4110 auto *VecElt = VecData ? VecData->getAggregateElement(Elt: I) : nullptr;
4111 if (isa<UndefValue>(Val: MaskElt)) {
4112 if (PassthruElt)
4113 NewElements.push_back(Elt: PassthruElt);
4114 else if (VecElt)
4115 NewElements.push_back(Elt: VecElt);
4116 else
4117 return nullptr;
4118 }
4119 if (MaskElt->isNullValue()) {
4120 if (!PassthruElt)
4121 return nullptr;
4122 NewElements.push_back(Elt: PassthruElt);
4123 } else if (MaskElt->isOneValue()) {
4124 if (!VecElt)
4125 return nullptr;
4126 NewElements.push_back(Elt: VecElt);
4127 } else {
4128 return nullptr;
4129 }
4130 }
4131 if (NewElements.size() != FVTy->getNumElements())
4132 return nullptr;
4133 return ConstantVector::get(V: NewElements);
4134 }
4135 case Intrinsic::arm_mve_vctp8:
4136 case Intrinsic::arm_mve_vctp16:
4137 case Intrinsic::arm_mve_vctp32:
4138 case Intrinsic::arm_mve_vctp64: {
4139 if (auto *Op = dyn_cast<ConstantInt>(Val: Operands[0])) {
4140 unsigned Lanes = FVTy->getNumElements();
4141 uint64_t Limit = Op->getZExtValue();
4142
4143 SmallVector<Constant *, 16> NCs;
4144 for (unsigned i = 0; i < Lanes; i++) {
4145 if (i < Limit)
4146 NCs.push_back(Elt: ConstantInt::getTrue(Ty));
4147 else
4148 NCs.push_back(Elt: ConstantInt::getFalse(Ty));
4149 }
4150 return ConstantVector::get(V: NCs);
4151 }
4152 return nullptr;
4153 }
4154 case Intrinsic::get_active_lane_mask: {
4155 auto *Op0 = dyn_cast<ConstantInt>(Val: Operands[0]);
4156 auto *Op1 = dyn_cast<ConstantInt>(Val: Operands[1]);
4157 if (Op0 && Op1) {
4158 unsigned Lanes = FVTy->getNumElements();
4159 uint64_t Base = Op0->getZExtValue();
4160 uint64_t Limit = Op1->getZExtValue();
4161
4162 SmallVector<Constant *, 16> NCs;
4163 for (unsigned i = 0; i < Lanes; i++) {
4164 if (Base + i < Limit)
4165 NCs.push_back(Elt: ConstantInt::getTrue(Ty));
4166 else
4167 NCs.push_back(Elt: ConstantInt::getFalse(Ty));
4168 }
4169 return ConstantVector::get(V: NCs);
4170 }
4171 return nullptr;
4172 }
4173 case Intrinsic::vector_extract: {
4174 auto *Idx = dyn_cast<ConstantInt>(Val: Operands[1]);
4175 Constant *Vec = Operands[0];
4176 if (!Idx || !isa<FixedVectorType>(Val: Vec->getType()))
4177 return nullptr;
4178
4179 unsigned NumElements = FVTy->getNumElements();
4180 unsigned VecNumElements =
4181 cast<FixedVectorType>(Val: Vec->getType())->getNumElements();
4182 unsigned StartingIndex = Idx->getZExtValue();
4183
4184 // Extracting entire vector is nop
4185 if (NumElements == VecNumElements && StartingIndex == 0)
4186 return Vec;
4187
4188 for (unsigned I = StartingIndex, E = StartingIndex + NumElements; I < E;
4189 ++I) {
4190 Constant *Elt = Vec->getAggregateElement(Elt: I);
4191 if (!Elt)
4192 return nullptr;
4193 Result[I - StartingIndex] = Elt;
4194 }
4195
4196 return ConstantVector::get(V: Result);
4197 }
4198 case Intrinsic::vector_insert: {
4199 Constant *Vec = Operands[0];
4200 Constant *SubVec = Operands[1];
4201 auto *Idx = dyn_cast<ConstantInt>(Val: Operands[2]);
4202 if (!Idx || !isa<FixedVectorType>(Val: Vec->getType()))
4203 return nullptr;
4204
4205 unsigned SubVecNumElements =
4206 cast<FixedVectorType>(Val: SubVec->getType())->getNumElements();
4207 unsigned VecNumElements =
4208 cast<FixedVectorType>(Val: Vec->getType())->getNumElements();
4209 unsigned IdxN = Idx->getZExtValue();
4210 // Replacing entire vector with a subvec is nop
4211 if (SubVecNumElements == VecNumElements && IdxN == 0)
4212 return SubVec;
4213
4214 for (unsigned I = 0; I < VecNumElements; ++I) {
4215 Constant *Elt;
4216 if (I < IdxN + SubVecNumElements)
4217 Elt = SubVec->getAggregateElement(Elt: I - IdxN);
4218 else
4219 Elt = Vec->getAggregateElement(Elt: I);
4220 if (!Elt)
4221 return nullptr;
4222 Result[I] = Elt;
4223 }
4224 return ConstantVector::get(V: Result);
4225 }
4226 case Intrinsic::vector_interleave2:
4227 case Intrinsic::vector_interleave3:
4228 case Intrinsic::vector_interleave4:
4229 case Intrinsic::vector_interleave5:
4230 case Intrinsic::vector_interleave6:
4231 case Intrinsic::vector_interleave7:
4232 case Intrinsic::vector_interleave8: {
4233 unsigned NumElements =
4234 cast<FixedVectorType>(Val: Operands[0]->getType())->getNumElements();
4235 unsigned NumOperands = Operands.size();
4236 for (unsigned I = 0; I < NumElements; ++I) {
4237 for (unsigned J = 0; J < NumOperands; ++J) {
4238 Constant *Elt = Operands[J]->getAggregateElement(Elt: I);
4239 if (!Elt)
4240 return nullptr;
4241 Result[NumOperands * I + J] = Elt;
4242 }
4243 }
4244 return ConstantVector::get(V: Result);
4245 }
4246 case Intrinsic::wasm_dot: {
4247 unsigned NumElements =
4248 cast<FixedVectorType>(Val: Operands[0]->getType())->getNumElements();
4249
4250 assert(NumElements == 8 && Result.size() == 4 &&
4251 "wasm dot takes i16x8 and produces i32x4");
4252 assert(Ty->isIntegerTy());
4253 int32_t MulVector[8];
4254
4255 for (unsigned I = 0; I < NumElements; ++I) {
4256 ConstantInt *Elt0 =
4257 cast<ConstantInt>(Val: Operands[0]->getAggregateElement(Elt: I));
4258 ConstantInt *Elt1 =
4259 cast<ConstantInt>(Val: Operands[1]->getAggregateElement(Elt: I));
4260
4261 MulVector[I] = Elt0->getSExtValue() * Elt1->getSExtValue();
4262 }
4263 for (unsigned I = 0; I < Result.size(); I++) {
4264 int64_t IAdd = (int64_t)MulVector[I * 2] + (int64_t)MulVector[I * 2 + 1];
4265 Result[I] = ConstantInt::getSigned(Ty, V: IAdd, /*ImplicitTrunc=*/true);
4266 }
4267
4268 return ConstantVector::get(V: Result);
4269 }
4270 default:
4271 break;
4272 }
4273
4274 for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
4275 // Gather a column of constants.
4276 for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {
4277 // Some intrinsics use a scalar type for certain arguments.
4278 if (isVectorIntrinsicWithScalarOpAtArg(ID: IntrinsicID, ScalarOpdIdx: J, /*TTI=*/nullptr)) {
4279 Lane[J] = Operands[J];
4280 continue;
4281 }
4282
4283 Constant *Agg = Operands[J]->getAggregateElement(Elt: I);
4284 if (!Agg)
4285 return nullptr;
4286
4287 Lane[J] = Agg;
4288 }
4289
4290 // Use the regular scalar folding to simplify this column.
4291 Constant *Folded =
4292 ConstantFoldScalarCall(Name, IntrinsicID, Ty, Operands: Lane, TLI, Call);
4293 if (!Folded)
4294 return nullptr;
4295 Result[I] = Folded;
4296 }
4297
4298 return ConstantVector::get(V: Result);
4299}
4300
4301static Constant *ConstantFoldScalableVectorCall(
4302 StringRef Name, Intrinsic::ID IntrinsicID, ScalableVectorType *SVTy,
4303 ArrayRef<Constant *> Operands, const DataLayout &DL,
4304 const TargetLibraryInfo *TLI, const CallBase *Call) {
4305 switch (IntrinsicID) {
4306 case Intrinsic::aarch64_sve_convert_from_svbool: {
4307 auto *Src = dyn_cast<Constant>(Val: Operands[0]);
4308 if (!Src || !Src->isNullValue())
4309 break;
4310
4311 return ConstantInt::getFalse(Ty: SVTy);
4312 }
4313 case Intrinsic::get_active_lane_mask: {
4314 auto *Op0 = dyn_cast<ConstantInt>(Val: Operands[0]);
4315 auto *Op1 = dyn_cast<ConstantInt>(Val: Operands[1]);
4316 if (Op0 && Op1 && Op0->getValue().uge(RHS: Op1->getValue()))
4317 return ConstantVector::getNullValue(Ty: SVTy);
4318 break;
4319 }
4320 case Intrinsic::vector_interleave2:
4321 case Intrinsic::vector_interleave3:
4322 case Intrinsic::vector_interleave4:
4323 case Intrinsic::vector_interleave5:
4324 case Intrinsic::vector_interleave6:
4325 case Intrinsic::vector_interleave7:
4326 case Intrinsic::vector_interleave8: {
4327 Constant *SplatVal = Operands[0]->getSplatValue();
4328 if (!SplatVal)
4329 return nullptr;
4330
4331 if (!llvm::all_equal(Range&: Operands))
4332 return nullptr;
4333
4334 return ConstantVector::getSplat(EC: SVTy->getElementCount(), Elt: SplatVal);
4335 }
4336 default:
4337 break;
4338 }
4339
4340 // If trivially vectorizable, try folding it via the scalar call if all
4341 // operands are splats.
4342
4343 // TODO: ConstantFoldFixedVectorCall should probably check this too?
4344 if (!isTriviallyVectorizable(ID: IntrinsicID))
4345 return nullptr;
4346
4347 SmallVector<Constant *, 4> SplatOps;
4348 for (auto [I, Op] : enumerate(First&: Operands)) {
4349 if (isVectorIntrinsicWithScalarOpAtArg(ID: IntrinsicID, ScalarOpdIdx: I, /*TTI=*/nullptr)) {
4350 SplatOps.push_back(Elt: Op);
4351 continue;
4352 }
4353 Constant *Splat = Op->getSplatValue();
4354 if (!Splat)
4355 return nullptr;
4356 SplatOps.push_back(Elt: Splat);
4357 }
4358 Constant *Folded = ConstantFoldScalarCall(
4359 Name, IntrinsicID, Ty: SVTy->getElementType(), Operands: SplatOps, TLI, Call);
4360 if (!Folded)
4361 return nullptr;
4362 return ConstantVector::getSplat(EC: SVTy->getElementCount(), Elt: Folded);
4363}
4364
4365static std::pair<Constant *, Constant *>
4366ConstantFoldScalarFrexpCall(Constant *Op, Type *IntTy) {
4367 if (isa<PoisonValue>(Val: Op))
4368 return {Op, PoisonValue::get(T: IntTy)};
4369
4370 auto *ConstFP = dyn_cast<ConstantFP>(Val: Op);
4371 if (!ConstFP)
4372 return {};
4373
4374 const APFloat &U = ConstFP->getValueAPF();
4375 int FrexpExp;
4376 APFloat FrexpMant = frexp(X: U, Exp&: FrexpExp, RM: APFloat::rmNearestTiesToEven);
4377 Constant *Result0 = ConstantFP::get(Ty: ConstFP->getType(), V: FrexpMant);
4378
4379 // The exponent is an "unspecified value" for inf/nan. We use zero to avoid
4380 // using undef.
4381 Constant *Result1 = FrexpMant.isFinite()
4382 ? ConstantInt::getSigned(Ty: IntTy, V: FrexpExp)
4383 : ConstantInt::getNullValue(Ty: IntTy);
4384 return {Result0, Result1};
4385}
4386
4387/// Handle intrinsics that return tuples, which may be tuples of vectors.
4388static Constant *
4389ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID,
4390 StructType *StTy, ArrayRef<Constant *> Operands,
4391 const DataLayout &DL, const TargetLibraryInfo *TLI,
4392 const CallBase *Call) {
4393
4394 switch (IntrinsicID) {
4395 case Intrinsic::frexp: {
4396 Type *Ty0 = StTy->getContainedType(i: 0);
4397 Type *Ty1 = StTy->getContainedType(i: 1)->getScalarType();
4398
4399 if (auto *FVTy0 = dyn_cast<FixedVectorType>(Val: Ty0)) {
4400 SmallVector<Constant *, 4> Results0(FVTy0->getNumElements());
4401 SmallVector<Constant *, 4> Results1(FVTy0->getNumElements());
4402
4403 for (unsigned I = 0, E = FVTy0->getNumElements(); I != E; ++I) {
4404 Constant *Lane = Operands[0]->getAggregateElement(Elt: I);
4405 std::tie(args&: Results0[I], args&: Results1[I]) =
4406 ConstantFoldScalarFrexpCall(Op: Lane, IntTy: Ty1);
4407 if (!Results0[I])
4408 return nullptr;
4409 }
4410
4411 return ConstantStruct::get(T: StTy, Vs: ConstantVector::get(V: Results0),
4412 Vs: ConstantVector::get(V: Results1));
4413 }
4414
4415 auto [Result0, Result1] = ConstantFoldScalarFrexpCall(Op: Operands[0], IntTy: Ty1);
4416 if (!Result0)
4417 return nullptr;
4418 return ConstantStruct::get(T: StTy, Vs: Result0, Vs: Result1);
4419 }
4420 case Intrinsic::sincos: {
4421 Type *Ty = StTy->getContainedType(i: 0);
4422 Type *TyScalar = Ty->getScalarType();
4423
4424 auto ConstantFoldScalarSincosCall =
4425 [&](Constant *Op) -> std::pair<Constant *, Constant *> {
4426 Constant *SinResult =
4427 ConstantFoldScalarCall(Name, IntrinsicID: Intrinsic::sin, Ty: TyScalar, Operands: Op, TLI, Call);
4428 Constant *CosResult =
4429 ConstantFoldScalarCall(Name, IntrinsicID: Intrinsic::cos, Ty: TyScalar, Operands: Op, TLI, Call);
4430 return std::make_pair(x&: SinResult, y&: CosResult);
4431 };
4432
4433 if (auto *FVTy = dyn_cast<FixedVectorType>(Val: Ty)) {
4434 SmallVector<Constant *> SinResults(FVTy->getNumElements());
4435 SmallVector<Constant *> CosResults(FVTy->getNumElements());
4436
4437 for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
4438 Constant *Lane = Operands[0]->getAggregateElement(Elt: I);
4439 std::tie(args&: SinResults[I], args&: CosResults[I]) =
4440 ConstantFoldScalarSincosCall(Lane);
4441 if (!SinResults[I] || !CosResults[I])
4442 return nullptr;
4443 }
4444
4445 return ConstantStruct::get(T: StTy, Vs: ConstantVector::get(V: SinResults),
4446 Vs: ConstantVector::get(V: CosResults));
4447 }
4448
4449 auto [SinResult, CosResult] = ConstantFoldScalarSincosCall(Operands[0]);
4450 if (!SinResult || !CosResult)
4451 return nullptr;
4452 return ConstantStruct::get(T: StTy, Vs: SinResult, Vs: CosResult);
4453 }
4454 case Intrinsic::vector_deinterleave2:
4455 case Intrinsic::vector_deinterleave3:
4456 case Intrinsic::vector_deinterleave4:
4457 case Intrinsic::vector_deinterleave5:
4458 case Intrinsic::vector_deinterleave6:
4459 case Intrinsic::vector_deinterleave7:
4460 case Intrinsic::vector_deinterleave8: {
4461 unsigned NumResults = StTy->getNumElements();
4462 auto *Vec = Operands[0];
4463 auto *VecTy = cast<VectorType>(Val: Vec->getType());
4464
4465 ElementCount ResultEC =
4466 VecTy->getElementCount().divideCoefficientBy(RHS: NumResults);
4467
4468 if (auto *EltC = Vec->getSplatValue()) {
4469 auto *ResultVec = ConstantVector::getSplat(EC: ResultEC, Elt: EltC);
4470 SmallVector<Constant *, 8> Results(NumResults, ResultVec);
4471 return ConstantStruct::get(T: StTy, V: Results);
4472 }
4473
4474 if (!ResultEC.isFixed())
4475 return nullptr;
4476
4477 unsigned NumElements = ResultEC.getFixedValue();
4478 SmallVector<Constant *, 8> Results(NumResults);
4479 SmallVector<Constant *> Elements(NumElements);
4480 for (unsigned I = 0; I != NumResults; ++I) {
4481 for (unsigned J = 0; J != NumElements; ++J) {
4482 Constant *Elt = Vec->getAggregateElement(Elt: J * NumResults + I);
4483 if (!Elt)
4484 return nullptr;
4485 Elements[J] = Elt;
4486 }
4487 Results[I] = ConstantVector::get(V: Elements);
4488 }
4489 return ConstantStruct::get(T: StTy, V: Results);
4490 }
4491 default:
4492 // TODO: Constant folding of vector intrinsics that fall through here does
4493 // not work (e.g. overflow intrinsics)
4494 return ConstantFoldScalarCall(Name, IntrinsicID, Ty: StTy, Operands, TLI, Call);
4495 }
4496
4497 return nullptr;
4498}
4499
4500} // end anonymous namespace
4501
4502Constant *llvm::ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS,
4503 Constant *RHS, Type *Ty,
4504 Instruction *FMFSource) {
4505 auto *Call = dyn_cast_if_present<CallBase>(Val: FMFSource);
4506 // Ensure we check flags like StrictFP that might prevent this from getting
4507 // folded before generating a result.
4508 if (Call && !canConstantFoldCallTo(Call, F: Call->getCalledFunction()))
4509 return nullptr;
4510 return ConstantFoldIntrinsicCall2(IntrinsicID: ID, Ty, Operands: {LHS, RHS}, Call);
4511}
4512
4513Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
4514 ArrayRef<Constant *> Operands,
4515 const TargetLibraryInfo *TLI,
4516 bool AllowNonDeterministic) {
4517 if (Call->isNoBuiltin())
4518 return nullptr;
4519 if (!F->hasName())
4520 return nullptr;
4521
4522 // If this is not an intrinsic and not recognized as a library call, bail out.
4523 Intrinsic::ID IID = F->getIntrinsicID();
4524 if (IID == Intrinsic::not_intrinsic) {
4525 if (!TLI)
4526 return nullptr;
4527 LibFunc LibF;
4528 if (!TLI->getLibFunc(FDecl: *F, F&: LibF))
4529 return nullptr;
4530 }
4531
4532 // Conservatively assume that floating-point libcalls may be
4533 // non-deterministic.
4534 Type *Ty = F->getReturnType();
4535 if (!AllowNonDeterministic && Ty->isFPOrFPVectorTy())
4536 return nullptr;
4537
4538 StringRef Name = F->getName();
4539 if (auto *FVTy = dyn_cast<FixedVectorType>(Val: Ty))
4540 return ConstantFoldFixedVectorCall(
4541 Name, IntrinsicID: IID, FVTy, Operands, DL: F->getDataLayout(), TLI, Call);
4542
4543 if (auto *SVTy = dyn_cast<ScalableVectorType>(Val: Ty))
4544 return ConstantFoldScalableVectorCall(
4545 Name, IntrinsicID: IID, SVTy, Operands, DL: F->getDataLayout(), TLI, Call);
4546
4547 if (auto *StTy = dyn_cast<StructType>(Val: Ty))
4548 return ConstantFoldStructCall(Name, IntrinsicID: IID, StTy, Operands,
4549 DL: F->getDataLayout(), TLI, Call);
4550
4551 // TODO: If this is a library function, we already discovered that above,
4552 // so we should pass the LibFunc, not the name (and it might be better
4553 // still to separate intrinsic handling from libcalls).
4554 return ConstantFoldScalarCall(Name, IntrinsicID: IID, Ty, Operands, TLI, Call);
4555}
4556
4557bool llvm::isMathLibCallNoop(const CallBase *Call,
4558 const TargetLibraryInfo *TLI) {
4559 // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap
4560 // (and to some extent ConstantFoldScalarCall).
4561 if (Call->isNoBuiltin() || Call->isStrictFP())
4562 return false;
4563 Function *F = Call->getCalledFunction();
4564 if (!F)
4565 return false;
4566
4567 LibFunc Func;
4568 if (!TLI || !TLI->getLibFunc(FDecl: *F, F&: Func))
4569 return false;
4570
4571 if (Call->arg_size() == 1) {
4572 if (ConstantFP *OpC = dyn_cast<ConstantFP>(Val: Call->getArgOperand(i: 0))) {
4573 const APFloat &Op = OpC->getValueAPF();
4574 switch (Func) {
4575 case LibFunc_logl:
4576 case LibFunc_log:
4577 case LibFunc_logf:
4578 case LibFunc_log2l:
4579 case LibFunc_log2:
4580 case LibFunc_log2f:
4581 case LibFunc_log10l:
4582 case LibFunc_log10:
4583 case LibFunc_log10f:
4584 return Op.isNaN() || (!Op.isZero() && !Op.isNegative());
4585
4586 case LibFunc_ilogb:
4587 return !Op.isNaN() && !Op.isZero() && !Op.isInfinity();
4588
4589 case LibFunc_expl:
4590 case LibFunc_exp:
4591 case LibFunc_expf:
4592 // FIXME: These boundaries are slightly conservative.
4593 if (OpC->getType()->isDoubleTy())
4594 return !(Op < APFloat(-745.0) || Op > APFloat(709.0));
4595 if (OpC->getType()->isFloatTy())
4596 return !(Op < APFloat(-103.0f) || Op > APFloat(88.0f));
4597 break;
4598
4599 case LibFunc_exp2l:
4600 case LibFunc_exp2:
4601 case LibFunc_exp2f:
4602 // FIXME: These boundaries are slightly conservative.
4603 if (OpC->getType()->isDoubleTy())
4604 return !(Op < APFloat(-1074.0) || Op > APFloat(1023.0));
4605 if (OpC->getType()->isFloatTy())
4606 return !(Op < APFloat(-149.0f) || Op > APFloat(127.0f));
4607 break;
4608
4609 case LibFunc_sinl:
4610 case LibFunc_sin:
4611 case LibFunc_sinf:
4612 case LibFunc_cosl:
4613 case LibFunc_cos:
4614 case LibFunc_cosf:
4615 return !Op.isInfinity();
4616
4617 case LibFunc_tanl:
4618 case LibFunc_tan:
4619 case LibFunc_tanf: {
4620 // FIXME: Stop using the host math library.
4621 // FIXME: The computation isn't done in the right precision.
4622 Type *Ty = OpC->getType();
4623 if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy())
4624 return ConstantFoldFP(NativeFP: tan, V: OpC->getValueAPF(), Ty) != nullptr;
4625 break;
4626 }
4627
4628 case LibFunc_atan:
4629 case LibFunc_atanf:
4630 case LibFunc_atanl:
4631 // Per POSIX, this MAY fail if Op is denormal. We choose not failing.
4632 return true;
4633
4634 case LibFunc_asinl:
4635 case LibFunc_asin:
4636 case LibFunc_asinf:
4637 case LibFunc_acosl:
4638 case LibFunc_acos:
4639 case LibFunc_acosf:
4640 return !(Op < APFloat::getOne(Sem: Op.getSemantics(), Negative: true) ||
4641 Op > APFloat::getOne(Sem: Op.getSemantics()));
4642
4643 case LibFunc_sinh:
4644 case LibFunc_cosh:
4645 case LibFunc_sinhf:
4646 case LibFunc_coshf:
4647 case LibFunc_sinhl:
4648 case LibFunc_coshl:
4649 // FIXME: These boundaries are slightly conservative.
4650 if (OpC->getType()->isDoubleTy())
4651 return !(Op < APFloat(-710.0) || Op > APFloat(710.0));
4652 if (OpC->getType()->isFloatTy())
4653 return !(Op < APFloat(-89.0f) || Op > APFloat(89.0f));
4654 break;
4655
4656 case LibFunc_sqrtl:
4657 case LibFunc_sqrt:
4658 case LibFunc_sqrtf:
4659 return Op.isNaN() || Op.isZero() || !Op.isNegative();
4660
4661 // FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p,
4662 // maybe others?
4663 default:
4664 break;
4665 }
4666 }
4667 }
4668
4669 if (Call->arg_size() == 2) {
4670 ConstantFP *Op0C = dyn_cast<ConstantFP>(Val: Call->getArgOperand(i: 0));
4671 ConstantFP *Op1C = dyn_cast<ConstantFP>(Val: Call->getArgOperand(i: 1));
4672 if (Op0C && Op1C) {
4673 const APFloat &Op0 = Op0C->getValueAPF();
4674 const APFloat &Op1 = Op1C->getValueAPF();
4675
4676 switch (Func) {
4677 case LibFunc_powl:
4678 case LibFunc_pow:
4679 case LibFunc_powf: {
4680 // FIXME: Stop using the host math library.
4681 // FIXME: The computation isn't done in the right precision.
4682 Type *Ty = Op0C->getType();
4683 if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) {
4684 if (Ty == Op1C->getType())
4685 return ConstantFoldBinaryFP(NativeFP: pow, V: Op0, W: Op1, Ty) != nullptr;
4686 }
4687 break;
4688 }
4689
4690 case LibFunc_fmodl:
4691 case LibFunc_fmod:
4692 case LibFunc_fmodf:
4693 case LibFunc_remainderl:
4694 case LibFunc_remainder:
4695 case LibFunc_remainderf:
4696 return Op0.isNaN() || Op1.isNaN() ||
4697 (!Op0.isInfinity() && !Op1.isZero());
4698
4699 case LibFunc_atan2:
4700 case LibFunc_atan2f:
4701 case LibFunc_atan2l:
4702 // Although IEEE-754 says atan2(+/-0.0, +/-0.0) are well-defined, and
4703 // GLIBC and MSVC do not appear to raise an error on those, we
4704 // cannot rely on that behavior. POSIX and C11 say that a domain error
4705 // may occur, so allow for that possibility.
4706 return !Op0.isZero() || !Op1.isZero();
4707
4708 default:
4709 break;
4710 }
4711 }
4712 }
4713
4714 return false;
4715}
4716
4717Constant *llvm::getLosslessInvCast(Constant *C, Type *InvCastTo,
4718 unsigned CastOp, const DataLayout &DL,
4719 PreservedCastFlags *Flags) {
4720 switch (CastOp) {
4721 case Instruction::BitCast:
4722 // Bitcast is always lossless.
4723 return ConstantFoldCastOperand(Opcode: Instruction::BitCast, C, DestTy: InvCastTo, DL);
4724 case Instruction::Trunc: {
4725 auto *ZExtC = ConstantFoldCastOperand(Opcode: Instruction::ZExt, C, DestTy: InvCastTo, DL);
4726 if (Flags) {
4727 // Truncation back on ZExt value is always NUW.
4728 Flags->NUW = true;
4729 // Test positivity of C.
4730 auto *SExtC =
4731 ConstantFoldCastOperand(Opcode: Instruction::SExt, C, DestTy: InvCastTo, DL);
4732 Flags->NSW = ZExtC == SExtC;
4733 }
4734 return ZExtC;
4735 }
4736 case Instruction::SExt:
4737 case Instruction::ZExt: {
4738 auto *InvC = ConstantExpr::getTrunc(C, Ty: InvCastTo);
4739 auto *CastInvC = ConstantFoldCastOperand(Opcode: CastOp, C: InvC, DestTy: C->getType(), DL);
4740 // Must satisfy CastOp(InvC) == C.
4741 if (!CastInvC || CastInvC != C)
4742 return nullptr;
4743 if (Flags && CastOp == Instruction::ZExt) {
4744 auto *SExtInvC =
4745 ConstantFoldCastOperand(Opcode: Instruction::SExt, C: InvC, DestTy: C->getType(), DL);
4746 // Test positivity of InvC.
4747 Flags->NNeg = CastInvC == SExtInvC;
4748 }
4749 return InvC;
4750 }
4751 default:
4752 return nullptr;
4753 }
4754}
4755
4756Constant *llvm::getLosslessUnsignedTrunc(Constant *C, Type *DestTy,
4757 const DataLayout &DL,
4758 PreservedCastFlags *Flags) {
4759 return getLosslessInvCast(C, InvCastTo: DestTy, CastOp: Instruction::ZExt, DL, Flags);
4760}
4761
4762Constant *llvm::getLosslessSignedTrunc(Constant *C, Type *DestTy,
4763 const DataLayout &DL,
4764 PreservedCastFlags *Flags) {
4765 return getLosslessInvCast(C, InvCastTo: DestTy, CastOp: Instruction::SExt, DL, Flags);
4766}
4767
4768void TargetFolder::anchor() {}
4769