1//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the library calls simplifier. It does not implement
10// any pass, but can be used by other passes to do simplifications.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
15#include "llvm/ADT/APFloat.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/ADT/SmallString.h"
18#include "llvm/ADT/StringExtras.h"
19#include "llvm/Analysis/ConstantFolding.h"
20#include "llvm/Analysis/Loads.h"
21#include "llvm/Analysis/OptimizationRemarkEmitter.h"
22#include "llvm/Analysis/TargetLibraryInfo.h"
23#include "llvm/Analysis/Utils/Local.h"
24#include "llvm/Analysis/ValueTracking.h"
25#include "llvm/IR/AttributeMask.h"
26#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/IRBuilder.h"
29#include "llvm/IR/IntrinsicInst.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/Module.h"
32#include "llvm/IR/PatternMatch.h"
33#include "llvm/Support/Casting.h"
34#include "llvm/Support/CommandLine.h"
35#include "llvm/Support/KnownBits.h"
36#include "llvm/Support/KnownFPClass.h"
37#include "llvm/Support/MathExtras.h"
38#include "llvm/TargetParser/Triple.h"
39#include "llvm/Transforms/Utils/BuildLibCalls.h"
40#include "llvm/Transforms/Utils/Local.h"
41#include "llvm/Transforms/Utils/SizeOpts.h"
42
43#include <cmath>
44
45using namespace llvm;
46using namespace PatternMatch;
47
48static cl::opt<bool>
49 EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
50 cl::init(Val: false),
51 cl::desc("Enable unsafe double to float "
52 "shrinking for math lib calls"));
53
54// Enable conversion of operator new calls with a MemProf hot or cold hint
55// to an operator new call that takes a hot/cold hint. Off by default since
56// not all allocators currently support this extension.
57static cl::opt<bool>
58 OptimizeHotColdNew("optimize-hot-cold-new", cl::Hidden, cl::init(Val: false),
59 cl::desc("Enable hot/cold operator new library calls"));
60static cl::opt<bool> OptimizeExistingHotColdNew(
61 "optimize-existing-hot-cold-new", cl::Hidden, cl::init(Val: false),
62 cl::desc(
63 "Enable optimization of existing hot/cold operator new library calls"));
64static cl::opt<bool> OptimizeNoBuiltinHotColdNew(
65 "optimize-nobuiltin-hot-cold-new-new", cl::Hidden, cl::init(Val: false),
66 cl::desc("Enable transformation of nobuiltin operator new library calls"));
67
68namespace {
69
70// Specialized parser to ensure the hint is an 8 bit value (we can't specify
71// uint8_t to opt<> as that is interpreted to mean that we are passing a char
72// option with a specific set of values.
73struct HotColdHintParser : public cl::parser<unsigned> {
74 HotColdHintParser(cl::Option &O) : cl::parser<unsigned>(O) {}
75
76 bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, unsigned &Value) {
77 if (Arg.getAsInteger(Radix: 0, Result&: Value))
78 return O.error(Message: "'" + Arg + "' value invalid for uint argument!");
79
80 if (Value > 255)
81 return O.error(Message: "'" + Arg + "' value must be in the range [0, 255]!");
82
83 return false;
84 }
85};
86
87} // end anonymous namespace
88
89// Hot/cold operator new takes an 8 bit hotness hint, where 0 is the coldest
90// and 255 is the hottest. Default to 1 value away from the coldest and hottest
91// hints, so that the compiler hinted allocations are slightly less strong than
92// manually inserted hints at the two extremes.
93static cl::opt<unsigned, false, HotColdHintParser> ColdNewHintValue(
94 "cold-new-hint-value", cl::Hidden, cl::init(Val: 1),
95 cl::desc("Value to pass to hot/cold operator new for cold allocation"));
96static cl::opt<unsigned, false, HotColdHintParser>
97 NotColdNewHintValue("notcold-new-hint-value", cl::Hidden, cl::init(Val: 128),
98 cl::desc("Value to pass to hot/cold operator new for "
99 "notcold (warm) allocation"));
100static cl::opt<unsigned, false, HotColdHintParser> HotNewHintValue(
101 "hot-new-hint-value", cl::Hidden, cl::init(Val: 254),
102 cl::desc("Value to pass to hot/cold operator new for hot allocation"));
103static cl::opt<unsigned, false, HotColdHintParser> AmbiguousNewHintValue(
104 "ambiguous-new-hint-value", cl::Hidden, cl::init(Val: 222),
105 cl::desc(
106 "Value to pass to hot/cold operator new for ambiguous allocation"));
107
108//===----------------------------------------------------------------------===//
109// Helper Functions
110//===----------------------------------------------------------------------===//
111
112static bool ignoreCallingConv(LibFunc Func) {
113 return Func == LibFunc_abs || Func == LibFunc_labs ||
114 Func == LibFunc_llabs || Func == LibFunc_strlen;
115}
116
117/// Return true if it is only used in equality comparisons with With.
118static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
119 for (User *U : V->users()) {
120 if (ICmpInst *IC = dyn_cast<ICmpInst>(Val: U))
121 if (IC->isEquality() && IC->getOperand(i_nocapture: 1) == With)
122 continue;
123 // Unknown instruction.
124 return false;
125 }
126 return true;
127}
128
129static bool callHasFloatingPointArgument(const CallInst *CI) {
130 return any_of(Range: CI->operands(), P: [](const Use &OI) {
131 return OI->getType()->isFloatingPointTy();
132 });
133}
134
135static bool callHasFP128Argument(const CallInst *CI) {
136 return any_of(Range: CI->operands(), P: [](const Use &OI) {
137 return OI->getType()->isFP128Ty();
138 });
139}
140
141// Convert the entire string Str representing an integer in Base, up to
142// the terminating nul if present, to a constant according to the rules
143// of strtoul[l] or, when AsSigned is set, of strtol[l]. On success
144// return the result, otherwise null.
145// The function assumes the string is encoded in ASCII and carefully
146// avoids converting sequences (including "") that the corresponding
147// library call might fail and set errno for.
148static Value *convertStrToInt(CallInst *CI, StringRef &Str, Value *EndPtr,
149 uint64_t Base, bool AsSigned, IRBuilderBase &B) {
150 if (Base < 2 || Base > 36)
151 if (Base != 0)
152 // Fail for an invalid base (required by POSIX).
153 return nullptr;
154
155 // Current offset into the original string to reflect in EndPtr.
156 size_t Offset = 0;
157 // Strip leading whitespace.
158 for ( ; Offset != Str.size(); ++Offset)
159 if (!isSpace(C: (unsigned char)Str[Offset])) {
160 Str = Str.substr(Start: Offset);
161 break;
162 }
163
164 if (Str.empty())
165 // Fail for empty subject sequences (POSIX allows but doesn't require
166 // strtol[l]/strtoul[l] to fail with EINVAL).
167 return nullptr;
168
169 // Strip but remember the sign.
170 bool Negate = Str[0] == '-';
171 if (Str[0] == '-' || Str[0] == '+') {
172 Str = Str.drop_front();
173 if (Str.empty())
174 // Fail for a sign with nothing after it.
175 return nullptr;
176 ++Offset;
177 }
178
179 // Set Max to the absolute value of the minimum (for signed), or
180 // to the maximum (for unsigned) value representable in the type.
181 Type *RetTy = CI->getType();
182 unsigned NBits = RetTy->getPrimitiveSizeInBits();
183 uint64_t Max = AsSigned && Negate ? 1 : 0;
184 Max += AsSigned ? maxIntN(N: NBits) : maxUIntN(N: NBits);
185
186 // Autodetect Base if it's zero and consume the "0x" prefix.
187 if (Str.size() > 1) {
188 if (Str[0] == '0') {
189 if (toUpper(x: (unsigned char)Str[1]) == 'X') {
190 if (Str.size() == 2 || (Base && Base != 16))
191 // Fail if Base doesn't allow the "0x" prefix or for the prefix
192 // alone that implementations like BSD set errno to EINVAL for.
193 return nullptr;
194
195 Str = Str.drop_front(N: 2);
196 Offset += 2;
197 Base = 16;
198 }
199 else if (Base == 0)
200 Base = 8;
201 } else if (Base == 0)
202 Base = 10;
203 }
204 else if (Base == 0)
205 Base = 10;
206
207 // Convert the rest of the subject sequence, not including the sign,
208 // to its uint64_t representation (this assumes the source character
209 // set is ASCII).
210 uint64_t Result = 0;
211 for (unsigned i = 0; i != Str.size(); ++i) {
212 unsigned char DigVal = Str[i];
213 if (isDigit(C: DigVal))
214 DigVal = DigVal - '0';
215 else {
216 DigVal = toUpper(x: DigVal);
217 if (isAlpha(C: DigVal))
218 DigVal = DigVal - 'A' + 10;
219 else
220 return nullptr;
221 }
222
223 if (DigVal >= Base)
224 // Fail if the digit is not valid in the Base.
225 return nullptr;
226
227 // Add the digit and fail if the result is not representable in
228 // the (unsigned form of the) destination type.
229 bool VFlow;
230 Result = SaturatingMultiplyAdd(X: Result, Y: Base, A: (uint64_t)DigVal, ResultOverflowed: &VFlow);
231 if (VFlow || Result > Max)
232 return nullptr;
233 }
234
235 if (EndPtr) {
236 // Store the pointer to the end.
237 Value *Off = B.getInt64(C: Offset + Str.size());
238 Value *StrBeg = CI->getArgOperand(i: 0);
239 Value *StrEnd = B.CreateInBoundsGEP(Ty: B.getInt8Ty(), Ptr: StrBeg, IdxList: Off, Name: "endptr");
240 B.CreateStore(Val: StrEnd, Ptr: EndPtr);
241 }
242
243 if (Negate) {
244 // Unsigned negation doesn't overflow.
245 Result = -Result;
246 // For unsigned numbers, discard sign bits.
247 if (!AsSigned)
248 Result &= maxUIntN(N: NBits);
249 }
250
251 return ConstantInt::get(Ty: RetTy, V: Result, IsSigned: AsSigned);
252}
253
254static bool isOnlyUsedInComparisonWithZero(Value *V) {
255 for (User *U : V->users()) {
256 if (ICmpInst *IC = dyn_cast<ICmpInst>(Val: U))
257 if (Constant *C = dyn_cast<Constant>(Val: IC->getOperand(i_nocapture: 1)))
258 if (C->isNullValue())
259 continue;
260 // Unknown instruction.
261 return false;
262 }
263 return true;
264}
265
266static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
267 const SimplifyQuery &SQ) {
268 if (!isOnlyUsedInComparisonWithZero(V: CI))
269 return false;
270
271 if (!isDereferenceablePointer(V: Str, Size: APInt(64, Len), Q: SQ))
272 return false;
273
274 if (CI->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeMemory))
275 return false;
276
277 return true;
278}
279
280static void annotateDereferenceableBytes(CallInst *CI,
281 ArrayRef<unsigned> ArgNos,
282 uint64_t DereferenceableBytes) {
283 const Function *F = CI->getCaller();
284 if (!F)
285 return;
286 for (unsigned ArgNo : ArgNos) {
287 uint64_t DerefBytes = DereferenceableBytes;
288 unsigned AS = CI->getArgOperand(i: ArgNo)->getType()->getPointerAddressSpace();
289 if (!llvm::NullPointerIsDefined(F, AS) ||
290 CI->paramHasAttr(ArgNo, Kind: Attribute::NonNull))
291 DerefBytes = std::max(a: CI->getParamDereferenceableOrNullBytes(i: ArgNo),
292 b: DereferenceableBytes);
293
294 if (CI->getParamDereferenceableBytes(i: ArgNo) < DerefBytes) {
295 CI->removeParamAttr(ArgNo, Kind: Attribute::Dereferenceable);
296 if (!llvm::NullPointerIsDefined(F, AS) ||
297 CI->paramHasAttr(ArgNo, Kind: Attribute::NonNull))
298 CI->removeParamAttr(ArgNo, Kind: Attribute::DereferenceableOrNull);
299 CI->addParamAttr(ArgNo, Attr: Attribute::getWithDereferenceableBytes(
300 Context&: CI->getContext(), Bytes: DerefBytes));
301 }
302 }
303}
304
305static void annotateNonNullNoUndefBasedOnAccess(CallInst *CI,
306 ArrayRef<unsigned> ArgNos) {
307 Function *F = CI->getCaller();
308 if (!F)
309 return;
310
311 for (unsigned ArgNo : ArgNos) {
312 if (!CI->paramHasAttr(ArgNo, Kind: Attribute::NoUndef))
313 CI->addParamAttr(ArgNo, Kind: Attribute::NoUndef);
314
315 if (!CI->paramHasAttr(ArgNo, Kind: Attribute::NonNull)) {
316 unsigned AS =
317 CI->getArgOperand(i: ArgNo)->getType()->getPointerAddressSpace();
318 if (llvm::NullPointerIsDefined(F, AS))
319 continue;
320 CI->addParamAttr(ArgNo, Kind: Attribute::NonNull);
321 }
322
323 annotateDereferenceableBytes(CI, ArgNos: ArgNo, DereferenceableBytes: 1);
324 }
325}
326
327static void annotateNonNullAndDereferenceable(CallInst *CI, ArrayRef<unsigned> ArgNos,
328 Value *Size, const DataLayout &DL) {
329 if (ConstantInt *LenC = dyn_cast<ConstantInt>(Val: Size)) {
330 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos);
331 annotateDereferenceableBytes(CI, ArgNos, DereferenceableBytes: LenC->getZExtValue());
332 } else if (isKnownNonZero(V: Size, Q: DL)) {
333 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos);
334 uint64_t X, Y;
335 uint64_t DerefMin = 1;
336 if (match(V: Size, P: m_Select(C: m_Value(), L: m_ConstantInt(V&: X), R: m_ConstantInt(V&: Y)))) {
337 DerefMin = std::min(a: X, b: Y);
338 annotateDereferenceableBytes(CI, ArgNos, DereferenceableBytes: DerefMin);
339 }
340 }
341}
342
343// Copy CallInst "flags" like musttail, notail, and tail. Return New param for
344// easier chaining. Calls to emit* and B.createCall should probably be wrapped
345// in this function when New is created to replace Old. Callers should take
346// care to check Old.isMustTailCall() if they aren't replacing Old directly
347// with New.
348static Value *copyFlags(const CallInst &Old, Value *New) {
349 assert(!Old.isMustTailCall() && "do not copy musttail call flags");
350 assert(!Old.isNoTailCall() && "do not copy notail call flags");
351 if (auto *NewCI = dyn_cast_or_null<CallInst>(Val: New))
352 NewCI->setTailCallKind(Old.getTailCallKind());
353 return New;
354}
355
356static Value *mergeAttributesAndFlags(CallInst *NewCI, const CallInst &Old) {
357 NewCI->setAttributes(AttributeList::get(
358 C&: NewCI->getContext(), Attrs: {NewCI->getAttributes(), Old.getAttributes()}));
359 NewCI->removeRetAttrs(AttrsToRemove: AttributeFuncs::typeIncompatible(
360 Ty: NewCI->getType(), AS: NewCI->getRetAttributes()));
361 for (unsigned I = 0; I < NewCI->arg_size(); ++I)
362 NewCI->removeParamAttrs(
363 ArgNo: I, AttrsToRemove: AttributeFuncs::typeIncompatible(Ty: NewCI->getArgOperand(i: I)->getType(),
364 AS: NewCI->getParamAttributes(ArgNo: I)));
365
366 return copyFlags(Old, New: NewCI);
367}
368
369// Helper to avoid truncating the length if size_t is 32-bits.
370static StringRef substr(StringRef Str, uint64_t Len) {
371 return Len >= Str.size() ? Str : Str.substr(Start: 0, N: Len);
372}
373
374//===----------------------------------------------------------------------===//
375// String and Memory Library Call Optimizations
376//===----------------------------------------------------------------------===//
377
378Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilderBase &B) {
379 // Extract some information from the instruction
380 Value *Dst = CI->getArgOperand(i: 0);
381 Value *Src = CI->getArgOperand(i: 1);
382 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: {0, 1});
383
384 // See if we can get the length of the input string.
385 uint64_t Len = GetStringLength(V: Src);
386 if (Len)
387 annotateDereferenceableBytes(CI, ArgNos: 1, DereferenceableBytes: Len);
388 else
389 return nullptr;
390 --Len; // Unbias length.
391
392 // Handle the simple, do-nothing case: strcat(x, "") -> x
393 if (Len == 0)
394 return Dst;
395
396 return copyFlags(Old: *CI, New: emitStrLenMemCpy(Src, Dst, Len, B));
397}
398
399Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
400 IRBuilderBase &B) {
401 // We need to find the end of the destination string. That's where the
402 // memory is to be moved to. We just generate a call to strlen.
403 Value *DstLen = emitStrLen(Ptr: Dst, B, DL, TLI);
404 if (!DstLen)
405 return nullptr;
406
407 // Now that we have the destination's length, we must index into the
408 // destination's pointer to get the actual memcpy destination (end of
409 // the string .. we're concatenating).
410 Value *CpyDst = B.CreateInBoundsGEP(Ty: B.getInt8Ty(), Ptr: Dst, IdxList: DstLen, Name: "endptr");
411
412 // We have enough information to now generate the memcpy call to do the
413 // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
414 B.CreateMemCpy(Dst: CpyDst, DstAlign: Align(1), Src, SrcAlign: Align(1),
415 Size: TLI->getAsSizeT(V: Len + 1, M: *B.GetInsertBlock()->getModule()));
416 return Dst;
417}
418
419Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilderBase &B) {
420 // Extract some information from the instruction.
421 Value *Dst = CI->getArgOperand(i: 0);
422 Value *Src = CI->getArgOperand(i: 1);
423 Value *Size = CI->getArgOperand(i: 2);
424 uint64_t Len;
425 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: 0);
426 if (isKnownNonZero(V: Size, Q: DL))
427 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: 1);
428
429 // We don't do anything if length is not constant.
430 ConstantInt *LengthArg = dyn_cast<ConstantInt>(Val: Size);
431 if (LengthArg) {
432 Len = LengthArg->getZExtValue();
433 // strncat(x, c, 0) -> x
434 if (!Len)
435 return Dst;
436 } else {
437 return nullptr;
438 }
439
440 // See if we can get the length of the input string.
441 uint64_t SrcLen = GetStringLength(V: Src);
442 if (SrcLen) {
443 annotateDereferenceableBytes(CI, ArgNos: 1, DereferenceableBytes: SrcLen);
444 --SrcLen; // Unbias length.
445 } else {
446 return nullptr;
447 }
448
449 // strncat(x, "", c) -> x
450 if (SrcLen == 0)
451 return Dst;
452
453 // We don't optimize this case.
454 if (Len < SrcLen)
455 return nullptr;
456
457 // strncat(x, s, c) -> strcat(x, s)
458 // s is constant so the strcat can be optimized further.
459 return copyFlags(Old: *CI, New: emitStrLenMemCpy(Src, Dst, Len: SrcLen, B));
460}
461
462// Helper to transform memchr(S, C, N) == S to N && *S == C and, when
463// NBytes is null, strchr(S, C) to *S == C. A precondition of the function
464// is that either S is dereferenceable or the value of N is nonzero.
465static Value* memChrToCharCompare(CallInst *CI, Value *NBytes,
466 IRBuilderBase &B, const DataLayout &DL)
467{
468 Value *Src = CI->getArgOperand(i: 0);
469 Value *CharVal = CI->getArgOperand(i: 1);
470
471 // Fold memchr(A, C, N) == A to N && *A == C.
472 Type *CharTy = B.getInt8Ty();
473 Value *Char0 = B.CreateLoad(Ty: CharTy, Ptr: Src);
474 CharVal = B.CreateTrunc(V: CharVal, DestTy: CharTy);
475 Value *Cmp = B.CreateICmpEQ(LHS: Char0, RHS: CharVal, Name: "char0cmp");
476
477 if (NBytes) {
478 Value *Zero = ConstantInt::get(Ty: NBytes->getType(), V: 0);
479 Value *And = B.CreateICmpNE(LHS: NBytes, RHS: Zero);
480 Cmp = B.CreateLogicalAnd(Cond1: And, Cond2: Cmp);
481 }
482
483 Value *NullPtr = Constant::getNullValue(Ty: CI->getType());
484 return B.CreateSelect(C: Cmp, True: Src, False: NullPtr);
485}
486
487Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
488 Value *SrcStr = CI->getArgOperand(i: 0);
489 Value *CharVal = CI->getArgOperand(i: 1);
490 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: 0);
491
492 if (isOnlyUsedInEqualityComparison(V: CI, With: SrcStr))
493 return memChrToCharCompare(CI, NBytes: nullptr, B, DL);
494
495 // If the second operand is non-constant, see if we can compute the length
496 // of the input string and turn this into memchr.
497 ConstantInt *CharC = dyn_cast<ConstantInt>(Val: CharVal);
498 if (!CharC) {
499 uint64_t Len = GetStringLength(V: SrcStr);
500 if (Len)
501 annotateDereferenceableBytes(CI, ArgNos: 0, DereferenceableBytes: Len);
502 else
503 return nullptr;
504
505 Function *Callee = CI->getCalledFunction();
506 FunctionType *FT = Callee->getFunctionType();
507 unsigned IntBits = TLI->getIntSize();
508 if (!FT->getParamType(i: 1)->isIntegerTy(BitWidth: IntBits)) // memchr needs 'int'.
509 return nullptr;
510
511 unsigned SizeTBits = TLI->getSizeTSize(M: *CI->getModule());
512 Type *SizeTTy = IntegerType::get(C&: CI->getContext(), NumBits: SizeTBits);
513 return copyFlags(Old: *CI,
514 New: emitMemChr(Ptr: SrcStr, Val: CharVal, // include nul.
515 Len: ConstantInt::get(Ty: SizeTTy, V: Len), B,
516 DL, TLI));
517 }
518
519 if (CharC->isZero()) {
520 Value *NullPtr = Constant::getNullValue(Ty: CI->getType());
521 if (isOnlyUsedInEqualityComparison(V: CI, With: NullPtr))
522 // Pre-empt the transformation to strlen below and fold
523 // strchr(A, '\0') == null to false.
524 return B.CreateIntToPtr(V: B.getTrue(), DestTy: CI->getType());
525 }
526
527 // Otherwise, the character is a constant, see if the first argument is
528 // a string literal. If so, we can constant fold.
529 StringRef Str;
530 if (!getConstantStringInfo(V: SrcStr, Str)) {
531 if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
532 if (Value *StrLen = emitStrLen(Ptr: SrcStr, B, DL, TLI))
533 return B.CreateInBoundsGEP(Ty: B.getInt8Ty(), Ptr: SrcStr, IdxList: StrLen, Name: "strchr");
534 return nullptr;
535 }
536
537 // Compute the offset, make sure to handle the case when we're searching for
538 // zero (a weird way to spell strlen).
539 size_t I = (0xFF & CharC->getSExtValue()) == 0
540 ? Str.size()
541 : Str.find(C: CharC->getSExtValue());
542 if (I == StringRef::npos) // Didn't find the char. strchr returns null.
543 return Constant::getNullValue(Ty: CI->getType());
544
545 // strchr(s+n,c) -> gep(s+n+i,c)
546 return B.CreateInBoundsGEP(Ty: B.getInt8Ty(), Ptr: SrcStr, IdxList: B.getInt64(C: I), Name: "strchr");
547}
548
549Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilderBase &B) {
550 Value *SrcStr = CI->getArgOperand(i: 0);
551 Value *CharVal = CI->getArgOperand(i: 1);
552 ConstantInt *CharC = dyn_cast<ConstantInt>(Val: CharVal);
553 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: 0);
554
555 StringRef Str;
556 if (!getConstantStringInfo(V: SrcStr, Str)) {
557 // strrchr(s, 0) -> strchr(s, 0)
558 if (CharC && CharC->isZero())
559 return copyFlags(Old: *CI, New: emitStrChr(Ptr: SrcStr, C: '\0', B, TLI));
560 return nullptr;
561 }
562
563 unsigned SizeTBits = TLI->getSizeTSize(M: *CI->getModule());
564 Type *SizeTTy = IntegerType::get(C&: CI->getContext(), NumBits: SizeTBits);
565
566 // Try to expand strrchr to the memrchr nonstandard extension if it's
567 // available, or simply fail otherwise.
568 uint64_t NBytes = Str.size() + 1; // Include the terminating nul.
569 Value *Size = ConstantInt::get(Ty: SizeTTy, V: NBytes);
570 return copyFlags(Old: *CI, New: emitMemRChr(Ptr: SrcStr, Val: CharVal, Len: Size, B, DL, TLI));
571}
572
573Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilderBase &B) {
574 Value *Str1P = CI->getArgOperand(i: 0), *Str2P = CI->getArgOperand(i: 1);
575 if (Str1P == Str2P) // strcmp(x,x) -> 0
576 return ConstantInt::get(Ty: CI->getType(), V: 0);
577
578 StringRef Str1, Str2;
579 bool HasStr1 = getConstantStringInfo(V: Str1P, Str&: Str1);
580 bool HasStr2 = getConstantStringInfo(V: Str2P, Str&: Str2);
581
582 // strcmp(x, y) -> cnst (if both x and y are constant strings)
583 if (HasStr1 && HasStr2)
584 return ConstantInt::getSigned(Ty: CI->getType(),
585 V: std::clamp(val: Str1.compare(RHS: Str2), lo: -1, hi: 1));
586
587 if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
588 return B.CreateNeg(V: B.CreateZExt(
589 V: B.CreateLoad(Ty: B.getInt8Ty(), Ptr: Str2P, Name: "strcmpload"), DestTy: CI->getType()));
590
591 if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
592 return B.CreateZExt(V: B.CreateLoad(Ty: B.getInt8Ty(), Ptr: Str1P, Name: "strcmpload"),
593 DestTy: CI->getType());
594
595 // strcmp(P, "x") -> memcmp(P, "x", 2)
596 uint64_t Len1 = GetStringLength(V: Str1P);
597 if (Len1)
598 annotateDereferenceableBytes(CI, ArgNos: 0, DereferenceableBytes: Len1);
599 uint64_t Len2 = GetStringLength(V: Str2P);
600 if (Len2)
601 annotateDereferenceableBytes(CI, ArgNos: 1, DereferenceableBytes: Len2);
602
603 if (Len1 && Len2) {
604 return copyFlags(
605 Old: *CI, New: emitMemCmp(Ptr1: Str1P, Ptr2: Str2P,
606 Len: TLI->getAsSizeT(V: std::min(a: Len1, b: Len2), M: *CI->getModule()),
607 B, DL, TLI));
608 }
609
610 // strcmp to memcmp
611 SimplifyQuery SQ(DL, TLI, DT, AC, CI);
612 if (!HasStr1 && HasStr2) {
613 if (canTransformToMemCmp(CI, Str: Str1P, Len: Len2, SQ))
614 return copyFlags(Old: *CI, New: emitMemCmp(Ptr1: Str1P, Ptr2: Str2P,
615 Len: TLI->getAsSizeT(V: Len2, M: *CI->getModule()),
616 B, DL, TLI));
617 } else if (HasStr1 && !HasStr2) {
618 if (canTransformToMemCmp(CI, Str: Str2P, Len: Len1, SQ))
619 return copyFlags(Old: *CI, New: emitMemCmp(Ptr1: Str1P, Ptr2: Str2P,
620 Len: TLI->getAsSizeT(V: Len1, M: *CI->getModule()),
621 B, DL, TLI));
622 }
623
624 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: {0, 1});
625 return nullptr;
626}
627
628// Optimize a memcmp or, when StrNCmp is true, strncmp call CI with constant
629// arrays LHS and RHS and nonconstant Size.
630static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS,
631 Value *Size, bool StrNCmp,
632 IRBuilderBase &B, const DataLayout &DL);
633
634Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) {
635 Value *Str1P = CI->getArgOperand(i: 0);
636 Value *Str2P = CI->getArgOperand(i: 1);
637 Value *Size = CI->getArgOperand(i: 2);
638 if (Str1P == Str2P) // strncmp(x,x,n) -> 0
639 return ConstantInt::get(Ty: CI->getType(), V: 0);
640
641 if (isKnownNonZero(V: Size, Q: DL))
642 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: {0, 1});
643 // Get the length argument if it is constant.
644 uint64_t Length;
645 if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Val: Size))
646 Length = LengthArg->getZExtValue();
647 else
648 return optimizeMemCmpVarSize(CI, LHS: Str1P, RHS: Str2P, Size, StrNCmp: true, B, DL);
649
650 if (Length == 0) // strncmp(x,y,0) -> 0
651 return ConstantInt::get(Ty: CI->getType(), V: 0);
652
653 if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
654 return copyFlags(Old: *CI, New: emitMemCmp(Ptr1: Str1P, Ptr2: Str2P, Len: Size, B, DL, TLI));
655
656 StringRef Str1, Str2;
657 bool HasStr1 = getConstantStringInfo(V: Str1P, Str&: Str1);
658 bool HasStr2 = getConstantStringInfo(V: Str2P, Str&: Str2);
659
660 // strncmp(x, y) -> cnst (if both x and y are constant strings)
661 if (HasStr1 && HasStr2) {
662 // Avoid truncating the 64-bit Length to 32 bits in ILP32.
663 StringRef SubStr1 = substr(Str: Str1, Len: Length);
664 StringRef SubStr2 = substr(Str: Str2, Len: Length);
665 return ConstantInt::getSigned(Ty: CI->getType(),
666 V: std::clamp(val: SubStr1.compare(RHS: SubStr2), lo: -1, hi: 1));
667 }
668
669 if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
670 return B.CreateNeg(V: B.CreateZExt(
671 V: B.CreateLoad(Ty: B.getInt8Ty(), Ptr: Str2P, Name: "strcmpload"), DestTy: CI->getType()));
672
673 if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
674 return B.CreateZExt(V: B.CreateLoad(Ty: B.getInt8Ty(), Ptr: Str1P, Name: "strcmpload"),
675 DestTy: CI->getType());
676
677 uint64_t Len1 = GetStringLength(V: Str1P);
678 if (Len1)
679 annotateDereferenceableBytes(CI, ArgNos: 0, DereferenceableBytes: Len1);
680 uint64_t Len2 = GetStringLength(V: Str2P);
681 if (Len2)
682 annotateDereferenceableBytes(CI, ArgNos: 1, DereferenceableBytes: Len2);
683
684 // strncmp to memcmp
685 if (!HasStr1 && HasStr2) {
686 Len2 = std::min(a: Len2, b: Length);
687 if (canTransformToMemCmp(CI, Str: Str1P, Len: Len2, SQ: DL))
688 return copyFlags(Old: *CI, New: emitMemCmp(Ptr1: Str1P, Ptr2: Str2P,
689 Len: TLI->getAsSizeT(V: Len2, M: *CI->getModule()),
690 B, DL, TLI));
691 } else if (HasStr1 && !HasStr2) {
692 Len1 = std::min(a: Len1, b: Length);
693 if (canTransformToMemCmp(CI, Str: Str2P, Len: Len1, SQ: DL))
694 return copyFlags(Old: *CI, New: emitMemCmp(Ptr1: Str1P, Ptr2: Str2P,
695 Len: TLI->getAsSizeT(V: Len1, M: *CI->getModule()),
696 B, DL, TLI));
697 }
698
699 return nullptr;
700}
701
702Value *LibCallSimplifier::optimizeStrNDup(CallInst *CI, IRBuilderBase &B) {
703 Value *Src = CI->getArgOperand(i: 0);
704 ConstantInt *Size = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 1));
705 uint64_t SrcLen = GetStringLength(V: Src);
706 if (SrcLen && Size) {
707 annotateDereferenceableBytes(CI, ArgNos: 0, DereferenceableBytes: SrcLen);
708 if (SrcLen <= Size->getZExtValue() + 1)
709 return copyFlags(Old: *CI, New: emitStrDup(Ptr: Src, B, TLI));
710 }
711
712 return nullptr;
713}
714
715Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) {
716 Value *Dst = CI->getArgOperand(i: 0), *Src = CI->getArgOperand(i: 1);
717 if (Dst == Src) // strcpy(x,x) -> x
718 return Src;
719
720 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: {0, 1});
721 // See if we can get the length of the input string.
722 uint64_t Len = GetStringLength(V: Src);
723 if (Len)
724 annotateDereferenceableBytes(CI, ArgNos: 1, DereferenceableBytes: Len);
725 else
726 return nullptr;
727
728 // We have enough information to now generate the memcpy call to do the
729 // copy for us. Make a memcpy to copy the nul byte with align = 1.
730 CallInst *NewCI = B.CreateMemCpy(Dst, DstAlign: Align(1), Src, SrcAlign: Align(1),
731 Size: TLI->getAsSizeT(V: Len, M: *CI->getModule()));
732 mergeAttributesAndFlags(NewCI, Old: *CI);
733 return Dst;
734}
735
736Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
737 Value *Dst = CI->getArgOperand(i: 0), *Src = CI->getArgOperand(i: 1);
738
739 // stpcpy(d,s) -> strcpy(d,s) if the result is not used.
740 if (CI->use_empty())
741 return copyFlags(Old: *CI, New: emitStrCpy(Dst, Src, B, TLI));
742
743 if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
744 Value *StrLen = emitStrLen(Ptr: Src, B, DL, TLI);
745 return StrLen ? B.CreateInBoundsGEP(Ty: B.getInt8Ty(), Ptr: Dst, IdxList: StrLen) : nullptr;
746 }
747
748 // See if we can get the length of the input string.
749 uint64_t Len = GetStringLength(V: Src);
750 if (Len)
751 annotateDereferenceableBytes(CI, ArgNos: 1, DereferenceableBytes: Len);
752 else
753 return nullptr;
754
755 Value *LenV = TLI->getAsSizeT(V: Len, M: *CI->getModule());
756 Value *DstEnd = B.CreateInBoundsGEP(
757 Ty: B.getInt8Ty(), Ptr: Dst, IdxList: TLI->getAsSizeT(V: Len - 1, M: *CI->getModule()));
758
759 // We have enough information to now generate the memcpy call to do the
760 // copy for us. Make a memcpy to copy the nul byte with align = 1.
761 CallInst *NewCI = B.CreateMemCpy(Dst, DstAlign: Align(1), Src, SrcAlign: Align(1), Size: LenV);
762 mergeAttributesAndFlags(NewCI, Old: *CI);
763 return DstEnd;
764}
765
766// Optimize a call to size_t strlcpy(char*, const char*, size_t).
767
768Value *LibCallSimplifier::optimizeStrLCpy(CallInst *CI, IRBuilderBase &B) {
769 Value *Size = CI->getArgOperand(i: 2);
770 if (isKnownNonZero(V: Size, Q: DL))
771 // Like snprintf, the function stores into the destination only when
772 // the size argument is nonzero.
773 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: 0);
774 // The function reads the source argument regardless of Size (it returns
775 // its length).
776 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: 1);
777
778 uint64_t NBytes;
779 if (ConstantInt *SizeC = dyn_cast<ConstantInt>(Val: Size))
780 NBytes = SizeC->getZExtValue();
781 else
782 return nullptr;
783
784 Value *Dst = CI->getArgOperand(i: 0);
785 Value *Src = CI->getArgOperand(i: 1);
786 if (NBytes <= 1) {
787 if (NBytes == 1)
788 // For a call to strlcpy(D, S, 1) first store a nul in *D.
789 B.CreateStore(Val: B.getInt8(C: 0), Ptr: Dst);
790
791 // Transform strlcpy(D, S, 0) to a call to strlen(S).
792 return copyFlags(Old: *CI, New: emitStrLen(Ptr: Src, B, DL, TLI));
793 }
794
795 // Try to determine the length of the source, substituting its size
796 // when it's not nul-terminated (as it's required to be) to avoid
797 // reading past its end.
798 StringRef Str;
799 if (!getConstantStringInfo(V: Src, Str, /*TrimAtNul=*/false))
800 return nullptr;
801
802 uint64_t SrcLen = Str.find(C: '\0');
803 // Set if the terminating nul should be copied by the call to memcpy
804 // below.
805 bool NulTerm = SrcLen < NBytes;
806
807 if (NulTerm)
808 // Overwrite NBytes with the number of bytes to copy, including
809 // the terminating nul.
810 NBytes = SrcLen + 1;
811 else {
812 // Set the length of the source for the function to return to its
813 // size, and cap NBytes at the same.
814 SrcLen = std::min(a: SrcLen, b: uint64_t(Str.size()));
815 NBytes = std::min(a: NBytes - 1, b: SrcLen);
816 }
817
818 if (SrcLen == 0) {
819 // Transform strlcpy(D, "", N) to (*D = '\0, 0).
820 B.CreateStore(Val: B.getInt8(C: 0), Ptr: Dst);
821 return ConstantInt::get(Ty: CI->getType(), V: 0);
822 }
823
824 // Transform strlcpy(D, S, N) to memcpy(D, S, N') where N' is the lower
825 // bound on strlen(S) + 1 and N, optionally followed by a nul store to
826 // D[N' - 1] if necessary.
827 CallInst *NewCI = B.CreateMemCpy(Dst, DstAlign: Align(1), Src, SrcAlign: Align(1),
828 Size: TLI->getAsSizeT(V: NBytes, M: *CI->getModule()));
829 mergeAttributesAndFlags(NewCI, Old: *CI);
830
831 if (!NulTerm) {
832 Value *EndOff = ConstantInt::get(Ty: CI->getType(), V: NBytes);
833 Value *EndPtr = B.CreateInBoundsGEP(Ty: B.getInt8Ty(), Ptr: Dst, IdxList: EndOff);
834 B.CreateStore(Val: B.getInt8(C: 0), Ptr: EndPtr);
835 }
836
837 // Like snprintf, strlcpy returns the number of nonzero bytes that would
838 // have been copied if the bound had been sufficiently big (which in this
839 // case is strlen(Src)).
840 return ConstantInt::get(Ty: CI->getType(), V: SrcLen);
841}
842
843// Optimize a call CI to either stpncpy when RetEnd is true, or to strncpy
844// otherwise.
845Value *LibCallSimplifier::optimizeStringNCpy(CallInst *CI, bool RetEnd,
846 IRBuilderBase &B) {
847 Value *Dst = CI->getArgOperand(i: 0);
848 Value *Src = CI->getArgOperand(i: 1);
849 Value *Size = CI->getArgOperand(i: 2);
850
851 if (isKnownNonZero(V: Size, Q: DL)) {
852 // Both st{p,r}ncpy(D, S, N) access the source and destination arrays
853 // only when N is nonzero.
854 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: 0);
855 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: 1);
856 }
857
858 // If the "bound" argument is known set N to it. Otherwise set it to
859 // UINT64_MAX and handle it later.
860 uint64_t N = UINT64_MAX;
861 if (ConstantInt *SizeC = dyn_cast<ConstantInt>(Val: Size))
862 N = SizeC->getZExtValue();
863
864 if (N == 0)
865 // Fold st{p,r}ncpy(D, S, 0) to D.
866 return Dst;
867
868 if (N == 1) {
869 Type *CharTy = B.getInt8Ty();
870 Value *CharVal = B.CreateLoad(Ty: CharTy, Ptr: Src, Name: "stxncpy.char0");
871 B.CreateStore(Val: CharVal, Ptr: Dst);
872 if (!RetEnd)
873 // Transform strncpy(D, S, 1) to return (*D = *S), D.
874 return Dst;
875
876 // Transform stpncpy(D, S, 1) to return (*D = *S) ? D + 1 : D.
877 Value *ZeroChar = ConstantInt::get(Ty: CharTy, V: 0);
878 Value *Cmp = B.CreateICmpEQ(LHS: CharVal, RHS: ZeroChar, Name: "stpncpy.char0cmp");
879
880 Value *Off1 = B.getInt32(C: 1);
881 Value *EndPtr = B.CreateInBoundsGEP(Ty: CharTy, Ptr: Dst, IdxList: Off1, Name: "stpncpy.end");
882 return B.CreateSelect(C: Cmp, True: Dst, False: EndPtr, Name: "stpncpy.sel");
883 }
884
885 // If the length of the input string is known set SrcLen to it.
886 uint64_t SrcLen = GetStringLength(V: Src);
887 if (SrcLen)
888 annotateDereferenceableBytes(CI, ArgNos: 1, DereferenceableBytes: SrcLen);
889 else
890 return nullptr;
891
892 --SrcLen; // Unbias length.
893
894 if (SrcLen == 0) {
895 // Transform st{p,r}ncpy(D, "", N) to memset(D, '\0', N) for any N.
896 Align MemSetAlign =
897 CI->getAttributes().getParamAttrs(ArgNo: 0).getAlignment().valueOrOne();
898 CallInst *NewCI = B.CreateMemSet(Ptr: Dst, Val: B.getInt8(C: '\0'), Size, Align: MemSetAlign);
899 AttrBuilder ArgAttrs(CI->getContext(), CI->getAttributes().getParamAttrs(ArgNo: 0));
900 NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
901 C&: CI->getContext(), ArgNo: 0, B: ArgAttrs));
902 copyFlags(Old: *CI, New: NewCI);
903 return Dst;
904 }
905
906 if (N > SrcLen + 1) {
907 if (N > 128)
908 // Bail if N is large or unknown.
909 return nullptr;
910
911 // st{p,r}ncpy(D, "a", N) -> memcpy(D, "a\0\0\0", N) for N <= 128.
912 StringRef Str;
913 if (!getConstantStringInfo(V: Src, Str))
914 return nullptr;
915 std::string SrcStr = Str.str();
916 // Create a bigger, nul-padded array with the same length, SrcLen,
917 // as the original string.
918 SrcStr.resize(n: N, c: '\0');
919 Src = B.CreateGlobalString(Str: SrcStr, Name: "str", /*AddressSpace=*/0,
920 /*M=*/nullptr, /*AddNull=*/false);
921 }
922
923 // st{p,r}ncpy(D, S, N) -> memcpy(align 1 D, align 1 S, N) when both
924 // S and N are constant.
925 CallInst *NewCI = B.CreateMemCpy(Dst, DstAlign: Align(1), Src, SrcAlign: Align(1),
926 Size: TLI->getAsSizeT(V: N, M: *CI->getModule()));
927 mergeAttributesAndFlags(NewCI, Old: *CI);
928 if (!RetEnd)
929 return Dst;
930
931 // stpncpy(D, S, N) returns the address of the first null in D if it writes
932 // one, otherwise D + N.
933 Value *Off = B.getInt64(C: std::min(a: SrcLen, b: N));
934 return B.CreateInBoundsGEP(Ty: B.getInt8Ty(), Ptr: Dst, IdxList: Off, Name: "endptr");
935}
936
937Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
938 unsigned CharSize,
939 Value *Bound) {
940 Value *Src = CI->getArgOperand(i: 0);
941 Type *CharTy = B.getIntNTy(N: CharSize);
942
943 if (isOnlyUsedInZeroEqualityComparison(CxtI: CI) &&
944 (!Bound || isKnownNonZero(V: Bound, Q: DL))) {
945 // Fold strlen:
946 // strlen(x) != 0 --> *x != 0
947 // strlen(x) == 0 --> *x == 0
948 // and likewise strnlen with constant N > 0:
949 // strnlen(x, N) != 0 --> *x != 0
950 // strnlen(x, N) == 0 --> *x == 0
951 return B.CreateZExt(V: B.CreateLoad(Ty: CharTy, Ptr: Src, Name: "char0"),
952 DestTy: CI->getType());
953 }
954
955 if (Bound) {
956 if (ConstantInt *BoundCst = dyn_cast<ConstantInt>(Val: Bound)) {
957 if (BoundCst->isZero())
958 // Fold strnlen(s, 0) -> 0 for any s, constant or otherwise.
959 return ConstantInt::get(Ty: CI->getType(), V: 0);
960
961 if (BoundCst->isOne()) {
962 // Fold strnlen(s, 1) -> *s ? 1 : 0 for any s.
963 Value *CharVal = B.CreateLoad(Ty: CharTy, Ptr: Src, Name: "strnlen.char0");
964 Value *ZeroChar = ConstantInt::get(Ty: CharTy, V: 0);
965 Value *Cmp = B.CreateICmpNE(LHS: CharVal, RHS: ZeroChar, Name: "strnlen.char0cmp");
966 return B.CreateZExt(V: Cmp, DestTy: CI->getType());
967 }
968 }
969 }
970
971 if (uint64_t Len = GetStringLength(V: Src, CharSize)) {
972 Value *LenC = ConstantInt::get(Ty: CI->getType(), V: Len - 1);
973 // Fold strlen("xyz") -> 3 and strnlen("xyz", 2) -> 2
974 // and strnlen("xyz", Bound) -> min(3, Bound) for nonconstant Bound.
975 if (Bound)
976 return B.CreateBinaryIntrinsic(ID: Intrinsic::umin, LHS: LenC, RHS: Bound);
977 return LenC;
978 }
979
980 if (Bound)
981 // Punt for strnlen for now.
982 return nullptr;
983
984 // If s is a constant pointer pointing to a string literal, we can fold
985 // strlen(s + x) to strlen(s) - x, when x is known to be in the range
986 // [0, strlen(s)] or the string has a single null terminator '\0' at the end.
987 // We only try to simplify strlen when the pointer s points to an array
988 // of CharSize elements. Otherwise, we would need to scale the offset x before
989 // doing the subtraction. This will make the optimization more complex, and
990 // it's not very useful because calling strlen for a pointer of other types is
991 // very uncommon.
992 if (GEPOperator *GEP = dyn_cast<GEPOperator>(Val: Src)) {
993 unsigned BW = DL.getIndexTypeSizeInBits(Ty: GEP->getType());
994 SmallMapVector<Value *, APInt, 4> VarOffsets;
995 APInt ConstOffset(BW, 0);
996 assert(CharSize % 8 == 0 && "Expected a multiple of 8 sized CharSize");
997 // Check the gep is a single variable offset.
998 if (!GEP->collectOffset(DL, BitWidth: BW, VariableOffsets&: VarOffsets, ConstantOffset&: ConstOffset) ||
999 VarOffsets.size() != 1 || ConstOffset != 0 ||
1000 VarOffsets.begin()->second != CharSize / 8)
1001 return nullptr;
1002
1003 ConstantDataArraySlice Slice;
1004 if (getConstantDataArrayInfo(V: GEP->getOperand(i_nocapture: 0), Slice, ElementSize: CharSize)) {
1005 uint64_t NullTermIdx;
1006 if (Slice.Array == nullptr) {
1007 NullTermIdx = 0;
1008 } else {
1009 NullTermIdx = ~((uint64_t)0);
1010 for (uint64_t I = 0, E = Slice.Length; I < E; ++I) {
1011 if (Slice.Array->getElementAsInteger(i: I + Slice.Offset) == 0) {
1012 NullTermIdx = I;
1013 break;
1014 }
1015 }
1016 // If the string does not have '\0', leave it to strlen to compute
1017 // its length.
1018 if (NullTermIdx == ~((uint64_t)0))
1019 return nullptr;
1020 }
1021
1022 Value *Offset = VarOffsets.begin()->first;
1023 KnownBits Known = computeKnownBits(V: Offset, DL, AC: nullptr, CxtI: CI, DT: nullptr);
1024
1025 // If Offset is not provably in the range [0, NullTermIdx], we can still
1026 // optimize if we can prove that the program has undefined behavior when
1027 // Offset is outside that range. That is the case when GEP->getOperand(0)
1028 // is a pointer to an object whose memory extent is NullTermIdx+1.
1029 if ((Known.isNonNegative() && Known.getMaxValue().ule(RHS: NullTermIdx)) ||
1030 (isa<GlobalVariable>(Val: GEP->getOperand(i_nocapture: 0)) &&
1031 NullTermIdx == Slice.Length - 1)) {
1032 Offset = B.CreateSExtOrTrunc(V: Offset, DestTy: CI->getType());
1033 return B.CreateSub(LHS: ConstantInt::get(Ty: CI->getType(), V: NullTermIdx),
1034 RHS: Offset);
1035 }
1036 }
1037 }
1038
1039 // strlen(x?"foo":"bars") --> x ? 3 : 4
1040 if (SelectInst *SI = dyn_cast<SelectInst>(Val: Src)) {
1041 uint64_t LenTrue = GetStringLength(V: SI->getTrueValue(), CharSize);
1042 uint64_t LenFalse = GetStringLength(V: SI->getFalseValue(), CharSize);
1043 if (LenTrue && LenFalse) {
1044 ORE.emit(RemarkBuilder: [&]() {
1045 return OptimizationRemark("instcombine", "simplify-libcalls", CI)
1046 << "folded strlen(select) to select of constants";
1047 });
1048 return B.CreateSelect(C: SI->getCondition(),
1049 True: ConstantInt::get(Ty: CI->getType(), V: LenTrue - 1),
1050 False: ConstantInt::get(Ty: CI->getType(), V: LenFalse - 1));
1051 }
1052 }
1053
1054 return nullptr;
1055}
1056
1057Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilderBase &B) {
1058 if (Value *V = optimizeStringLength(CI, B, CharSize: 8))
1059 return V;
1060 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: 0);
1061 return nullptr;
1062}
1063
1064Value *LibCallSimplifier::optimizeStrNLen(CallInst *CI, IRBuilderBase &B) {
1065 Value *Bound = CI->getArgOperand(i: 1);
1066 if (Value *V = optimizeStringLength(CI, B, CharSize: 8, Bound))
1067 return V;
1068
1069 if (isKnownNonZero(V: Bound, Q: DL))
1070 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: 0);
1071 return nullptr;
1072}
1073
1074Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilderBase &B) {
1075 Module &M = *CI->getModule();
1076 unsigned WCharSize = TLI->getWCharSize(M) * 8;
1077 // We cannot perform this optimization without wchar_size metadata.
1078 if (WCharSize == 0)
1079 return nullptr;
1080
1081 return optimizeStringLength(CI, B, CharSize: WCharSize);
1082}
1083
1084Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilderBase &B) {
1085 StringRef S1, S2;
1086 bool HasS1 = getConstantStringInfo(V: CI->getArgOperand(i: 0), Str&: S1);
1087 bool HasS2 = getConstantStringInfo(V: CI->getArgOperand(i: 1), Str&: S2);
1088
1089 // strpbrk(s, "") -> nullptr
1090 // strpbrk("", s) -> nullptr
1091 if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
1092 return Constant::getNullValue(Ty: CI->getType());
1093
1094 // Constant folding.
1095 if (HasS1 && HasS2) {
1096 size_t I = S1.find_first_of(Chars: S2);
1097 if (I == StringRef::npos) // No match.
1098 return Constant::getNullValue(Ty: CI->getType());
1099
1100 return B.CreateInBoundsGEP(Ty: B.getInt8Ty(), Ptr: CI->getArgOperand(i: 0),
1101 IdxList: B.getInt64(C: I), Name: "strpbrk");
1102 }
1103
1104 // strpbrk(s, "a") -> strchr(s, 'a')
1105 if (HasS2 && S2.size() == 1)
1106 return copyFlags(Old: *CI, New: emitStrChr(Ptr: CI->getArgOperand(i: 0), C: S2[0], B, TLI));
1107
1108 return nullptr;
1109}
1110
1111Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilderBase &B) {
1112 Value *EndPtr = CI->getArgOperand(i: 1);
1113 if (isa<ConstantPointerNull>(Val: EndPtr)) {
1114 // With a null EndPtr, this function won't capture the main argument.
1115 // It would be readonly too, except that it still may write to errno.
1116 CI->addParamAttr(ArgNo: 0, Attr: Attribute::getWithCaptureInfo(Context&: CI->getContext(),
1117 CI: CaptureInfo::none()));
1118 }
1119
1120 return nullptr;
1121}
1122
1123Value *LibCallSimplifier::optimizeStrSpn(CallInst *CI, IRBuilderBase &B) {
1124 StringRef S1, S2;
1125 bool HasS1 = getConstantStringInfo(V: CI->getArgOperand(i: 0), Str&: S1);
1126 bool HasS2 = getConstantStringInfo(V: CI->getArgOperand(i: 1), Str&: S2);
1127
1128 // strspn(s, "") -> 0
1129 // strspn("", s) -> 0
1130 if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
1131 return Constant::getNullValue(Ty: CI->getType());
1132
1133 // Constant folding.
1134 if (HasS1 && HasS2) {
1135 size_t Pos = S1.find_first_not_of(Chars: S2);
1136 if (Pos == StringRef::npos)
1137 Pos = S1.size();
1138 return ConstantInt::get(Ty: CI->getType(), V: Pos);
1139 }
1140
1141 return nullptr;
1142}
1143
1144Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilderBase &B) {
1145 StringRef S1, S2;
1146 bool HasS1 = getConstantStringInfo(V: CI->getArgOperand(i: 0), Str&: S1);
1147 bool HasS2 = getConstantStringInfo(V: CI->getArgOperand(i: 1), Str&: S2);
1148
1149 // strcspn("", s) -> 0
1150 if (HasS1 && S1.empty())
1151 return Constant::getNullValue(Ty: CI->getType());
1152
1153 // Constant folding.
1154 if (HasS1 && HasS2) {
1155 size_t Pos = S1.find_first_of(Chars: S2);
1156 if (Pos == StringRef::npos)
1157 Pos = S1.size();
1158 return ConstantInt::get(Ty: CI->getType(), V: Pos);
1159 }
1160
1161 // strcspn(s, "") -> strlen(s)
1162 if (HasS2 && S2.empty())
1163 return copyFlags(Old: *CI, New: emitStrLen(Ptr: CI->getArgOperand(i: 0), B, DL, TLI));
1164
1165 return nullptr;
1166}
1167
1168Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilderBase &B) {
1169 // fold strstr(x, x) -> x.
1170 if (CI->getArgOperand(i: 0) == CI->getArgOperand(i: 1))
1171 return CI->getArgOperand(i: 0);
1172
1173 // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
1174 if (isOnlyUsedInEqualityComparison(V: CI, With: CI->getArgOperand(i: 0))) {
1175 Value *StrLen = emitStrLen(Ptr: CI->getArgOperand(i: 1), B, DL, TLI);
1176 if (!StrLen)
1177 return nullptr;
1178 Value *StrNCmp = emitStrNCmp(Ptr1: CI->getArgOperand(i: 0), Ptr2: CI->getArgOperand(i: 1),
1179 Len: StrLen, B, DL, TLI);
1180 if (!StrNCmp)
1181 return nullptr;
1182 for (User *U : llvm::make_early_inc_range(Range: CI->users())) {
1183 ICmpInst *Old = cast<ICmpInst>(Val: U);
1184 Value *Cmp =
1185 B.CreateICmp(P: Old->getPredicate(), LHS: StrNCmp,
1186 RHS: ConstantInt::getNullValue(Ty: StrNCmp->getType()), Name: "cmp");
1187 replaceAllUsesWith(I: Old, With: Cmp);
1188 }
1189 return CI;
1190 }
1191
1192 // See if either input string is a constant string.
1193 StringRef SearchStr, ToFindStr;
1194 bool HasStr1 = getConstantStringInfo(V: CI->getArgOperand(i: 0), Str&: SearchStr);
1195 bool HasStr2 = getConstantStringInfo(V: CI->getArgOperand(i: 1), Str&: ToFindStr);
1196
1197 // fold strstr(x, "") -> x.
1198 if (HasStr2 && ToFindStr.empty())
1199 return CI->getArgOperand(i: 0);
1200
1201 // If both strings are known, constant fold it.
1202 if (HasStr1 && HasStr2) {
1203 size_t Offset = SearchStr.find(Str: ToFindStr);
1204
1205 if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
1206 return Constant::getNullValue(Ty: CI->getType());
1207
1208 // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
1209 return B.CreateConstInBoundsGEP1_64(Ty: B.getInt8Ty(), Ptr: CI->getArgOperand(i: 0),
1210 Idx0: Offset, Name: "strstr");
1211 }
1212
1213 // fold strstr(x, "y") -> strchr(x, 'y').
1214 if (HasStr2 && ToFindStr.size() == 1) {
1215 return emitStrChr(Ptr: CI->getArgOperand(i: 0), C: ToFindStr[0], B, TLI);
1216 }
1217
1218 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: {0, 1});
1219 return nullptr;
1220}
1221
1222Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilderBase &B) {
1223 Value *SrcStr = CI->getArgOperand(i: 0);
1224 Value *Size = CI->getArgOperand(i: 2);
1225 annotateNonNullAndDereferenceable(CI, ArgNos: 0, Size, DL);
1226 Value *CharVal = CI->getArgOperand(i: 1);
1227 ConstantInt *LenC = dyn_cast<ConstantInt>(Val: Size);
1228 Value *NullPtr = Constant::getNullValue(Ty: CI->getType());
1229
1230 if (LenC) {
1231 if (LenC->isZero())
1232 // Fold memrchr(x, y, 0) --> null.
1233 return NullPtr;
1234
1235 if (LenC->isOne()) {
1236 // Fold memrchr(x, y, 1) --> *x == y ? x : null for any x and y,
1237 // constant or otherwise.
1238 Value *Val = B.CreateLoad(Ty: B.getInt8Ty(), Ptr: SrcStr, Name: "memrchr.char0");
1239 // Slice off the character's high end bits.
1240 CharVal = B.CreateTrunc(V: CharVal, DestTy: B.getInt8Ty());
1241 Value *Cmp = B.CreateICmpEQ(LHS: Val, RHS: CharVal, Name: "memrchr.char0cmp");
1242 return B.CreateSelect(C: Cmp, True: SrcStr, False: NullPtr, Name: "memrchr.sel");
1243 }
1244 }
1245
1246 StringRef Str;
1247 if (!getConstantStringInfo(V: SrcStr, Str, /*TrimAtNul=*/false))
1248 return nullptr;
1249
1250 if (Str.size() == 0)
1251 // If the array is empty fold memrchr(A, C, N) to null for any value
1252 // of C and N on the basis that the only valid value of N is zero
1253 // (otherwise the call is undefined).
1254 return NullPtr;
1255
1256 uint64_t EndOff = UINT64_MAX;
1257 if (LenC) {
1258 EndOff = LenC->getZExtValue();
1259 if (Str.size() < EndOff)
1260 // Punt out-of-bounds accesses to sanitizers and/or libc.
1261 return nullptr;
1262 }
1263
1264 if (ConstantInt *CharC = dyn_cast<ConstantInt>(Val: CharVal)) {
1265 // Fold memrchr(S, C, N) for a constant C.
1266 size_t Pos = Str.rfind(C: CharC->getZExtValue(), From: EndOff);
1267 if (Pos == StringRef::npos)
1268 // When the character is not in the source array fold the result
1269 // to null regardless of Size.
1270 return NullPtr;
1271
1272 if (LenC)
1273 // Fold memrchr(s, c, N) --> s + Pos for constant N > Pos.
1274 return B.CreateInBoundsGEP(Ty: B.getInt8Ty(), Ptr: SrcStr, IdxList: B.getInt64(C: Pos));
1275
1276 if (Str.find(C: Str[Pos]) == Pos) {
1277 // When there is just a single occurrence of C in S, i.e., the one
1278 // in Str[Pos], fold
1279 // memrchr(s, c, N) --> N <= Pos ? null : s + Pos
1280 // for nonconstant N.
1281 Value *Cmp = B.CreateICmpULE(LHS: Size, RHS: ConstantInt::get(Ty: Size->getType(), V: Pos),
1282 Name: "memrchr.cmp");
1283 Value *SrcPlus = B.CreateInBoundsGEP(Ty: B.getInt8Ty(), Ptr: SrcStr,
1284 IdxList: B.getInt64(C: Pos), Name: "memrchr.ptr_plus");
1285 return B.CreateSelect(C: Cmp, True: NullPtr, False: SrcPlus, Name: "memrchr.sel");
1286 }
1287 }
1288
1289 // Truncate the string to search at most EndOff characters.
1290 Str = Str.substr(Start: 0, N: EndOff);
1291 if (Str.find_first_not_of(C: Str[0]) != StringRef::npos)
1292 return nullptr;
1293
1294 // If the source array consists of all equal characters, then for any
1295 // C and N (whether in bounds or not), fold memrchr(S, C, N) to
1296 // N != 0 && *S == C ? S + N - 1 : null
1297 Type *SizeTy = Size->getType();
1298 Type *Int8Ty = B.getInt8Ty();
1299 Value *NNeZ = B.CreateICmpNE(LHS: Size, RHS: ConstantInt::get(Ty: SizeTy, V: 0));
1300 // Slice off the sought character's high end bits.
1301 CharVal = B.CreateTrunc(V: CharVal, DestTy: Int8Ty);
1302 Value *CEqS0 = B.CreateICmpEQ(LHS: ConstantInt::get(Ty: Int8Ty, V: Str[0]), RHS: CharVal);
1303 Value *And = B.CreateLogicalAnd(Cond1: NNeZ, Cond2: CEqS0);
1304 Value *SizeM1 = B.CreateSub(LHS: Size, RHS: ConstantInt::get(Ty: SizeTy, V: 1));
1305 Value *SrcPlus =
1306 B.CreateInBoundsGEP(Ty: Int8Ty, Ptr: SrcStr, IdxList: SizeM1, Name: "memrchr.ptr_plus");
1307 return B.CreateSelect(C: And, True: SrcPlus, False: NullPtr, Name: "memrchr.sel");
1308}
1309
1310Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
1311 Value *SrcStr = CI->getArgOperand(i: 0);
1312 Value *Size = CI->getArgOperand(i: 2);
1313
1314 if (isKnownNonZero(V: Size, Q: DL)) {
1315 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: 0);
1316 if (isOnlyUsedInEqualityComparison(V: CI, With: SrcStr))
1317 return memChrToCharCompare(CI, NBytes: Size, B, DL);
1318 }
1319
1320 Value *CharVal = CI->getArgOperand(i: 1);
1321 ConstantInt *CharC = dyn_cast<ConstantInt>(Val: CharVal);
1322 ConstantInt *LenC = dyn_cast<ConstantInt>(Val: Size);
1323 Value *NullPtr = Constant::getNullValue(Ty: CI->getType());
1324
1325 // memchr(x, y, 0) -> null
1326 if (LenC) {
1327 if (LenC->isZero())
1328 return NullPtr;
1329
1330 if (LenC->isOne()) {
1331 // Fold memchr(x, y, 1) --> *x == y ? x : null for any x and y,
1332 // constant or otherwise.
1333 Value *Val = B.CreateLoad(Ty: B.getInt8Ty(), Ptr: SrcStr, Name: "memchr.char0");
1334 // Slice off the character's high end bits.
1335 CharVal = B.CreateTrunc(V: CharVal, DestTy: B.getInt8Ty());
1336 Value *Cmp = B.CreateICmpEQ(LHS: Val, RHS: CharVal, Name: "memchr.char0cmp");
1337 return B.CreateSelect(C: Cmp, True: SrcStr, False: NullPtr, Name: "memchr.sel");
1338 }
1339 }
1340
1341 StringRef Str;
1342 if (!getConstantStringInfo(V: SrcStr, Str, /*TrimAtNul=*/false))
1343 return nullptr;
1344
1345 if (CharC) {
1346 size_t Pos = Str.find(C: CharC->getZExtValue());
1347 if (Pos == StringRef::npos)
1348 // When the character is not in the source array fold the result
1349 // to null regardless of Size.
1350 return NullPtr;
1351
1352 // Fold memchr(s, c, n) -> n <= Pos ? null : s + Pos
1353 // When the constant Size is less than or equal to the character
1354 // position also fold the result to null.
1355 Value *Cmp = B.CreateICmpULE(LHS: Size, RHS: ConstantInt::get(Ty: Size->getType(), V: Pos),
1356 Name: "memchr.cmp");
1357 Value *SrcPlus = B.CreateInBoundsGEP(Ty: B.getInt8Ty(), Ptr: SrcStr, IdxList: B.getInt64(C: Pos),
1358 Name: "memchr.ptr");
1359 return B.CreateSelect(C: Cmp, True: NullPtr, False: SrcPlus);
1360 }
1361
1362 if (Str.size() == 0)
1363 // If the array is empty fold memchr(A, C, N) to null for any value
1364 // of C and N on the basis that the only valid value of N is zero
1365 // (otherwise the call is undefined).
1366 return NullPtr;
1367
1368 if (LenC)
1369 Str = substr(Str, Len: LenC->getZExtValue());
1370
1371 size_t Pos = Str.find_first_not_of(C: Str[0]);
1372 if (Pos == StringRef::npos
1373 || Str.find_first_not_of(C: Str[Pos], From: Pos) == StringRef::npos) {
1374 // If the source array consists of at most two consecutive sequences
1375 // of the same characters, then for any C and N (whether in bounds or
1376 // not), fold memchr(S, C, N) to
1377 // N != 0 && *S == C ? S : null
1378 // or for the two sequences to:
1379 // N != 0 && *S == C ? S : (N > Pos && S[Pos] == C ? S + Pos : null)
1380 // ^Sel2 ^Sel1 are denoted above.
1381 // The latter makes it also possible to fold strchr() calls with strings
1382 // of the same characters.
1383 Type *SizeTy = Size->getType();
1384 Type *Int8Ty = B.getInt8Ty();
1385
1386 // Slice off the sought character's high end bits.
1387 CharVal = B.CreateTrunc(V: CharVal, DestTy: Int8Ty);
1388
1389 Value *Sel1 = NullPtr;
1390 if (Pos != StringRef::npos) {
1391 // Handle two consecutive sequences of the same characters.
1392 Value *PosVal = ConstantInt::get(Ty: SizeTy, V: Pos);
1393 Value *StrPos = ConstantInt::get(Ty: Int8Ty, V: Str[Pos]);
1394 Value *CEqSPos = B.CreateICmpEQ(LHS: CharVal, RHS: StrPos);
1395 Value *NGtPos = B.CreateICmp(P: ICmpInst::ICMP_UGT, LHS: Size, RHS: PosVal);
1396 Value *And = B.CreateAnd(LHS: CEqSPos, RHS: NGtPos);
1397 Value *SrcPlus = B.CreateInBoundsGEP(Ty: B.getInt8Ty(), Ptr: SrcStr, IdxList: PosVal);
1398 Sel1 = B.CreateSelect(C: And, True: SrcPlus, False: NullPtr, Name: "memchr.sel1");
1399 }
1400
1401 Value *Str0 = ConstantInt::get(Ty: Int8Ty, V: Str[0]);
1402 Value *CEqS0 = B.CreateICmpEQ(LHS: Str0, RHS: CharVal);
1403 Value *NNeZ = B.CreateICmpNE(LHS: Size, RHS: ConstantInt::get(Ty: SizeTy, V: 0));
1404 Value *And = B.CreateAnd(LHS: NNeZ, RHS: CEqS0);
1405 return B.CreateSelect(C: And, True: SrcStr, False: Sel1, Name: "memchr.sel2");
1406 }
1407
1408 if (!LenC) {
1409 if (isOnlyUsedInEqualityComparison(V: CI, With: SrcStr))
1410 // S is dereferenceable so it's safe to load from it and fold
1411 // memchr(S, C, N) == S to N && *S == C for any C and N.
1412 // TODO: This is safe even for nonconstant S.
1413 return memChrToCharCompare(CI, NBytes: Size, B, DL);
1414
1415 // From now on we need a constant length and constant array.
1416 return nullptr;
1417 }
1418
1419 bool OptForSize = llvm::shouldOptimizeForSize(BB: CI->getParent(), PSI, BFI,
1420 QueryType: PGSOQueryType::IRPass);
1421
1422 // If the char is variable but the input str and length are not we can turn
1423 // this memchr call into a simple bit field test. Of course this only works
1424 // when the return value is only checked against null.
1425 //
1426 // It would be really nice to reuse switch lowering here but we can't change
1427 // the CFG at this point.
1428 //
1429 // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n')))
1430 // != 0
1431 // after bounds check.
1432 if (OptForSize || Str.empty() || !isOnlyUsedInZeroEqualityComparison(CxtI: CI))
1433 return nullptr;
1434
1435 unsigned char Max =
1436 *std::max_element(first: reinterpret_cast<const unsigned char *>(Str.begin()),
1437 last: reinterpret_cast<const unsigned char *>(Str.end()));
1438
1439 // Make sure the bit field we're about to create fits in a register on the
1440 // target.
1441 // FIXME: On a 64 bit architecture this prevents us from using the
1442 // interesting range of alpha ascii chars. We could do better by emitting
1443 // two bitfields or shifting the range by 64 if no lower chars are used.
1444 if (!DL.fitsInLegalInteger(Width: Max + 1)) {
1445 // Build chain of ORs
1446 // Transform:
1447 // memchr("abcd", C, 4) != nullptr
1448 // to:
1449 // (C == 'a' || C == 'b' || C == 'c' || C == 'd') != 0
1450 std::string SortedStr = Str.str();
1451 llvm::sort(C&: SortedStr);
1452 // Compute the number of of non-contiguous ranges.
1453 unsigned NonContRanges = 1;
1454 for (size_t i = 1; i < SortedStr.size(); ++i) {
1455 if (SortedStr[i] > SortedStr[i - 1] + 1) {
1456 NonContRanges++;
1457 }
1458 }
1459
1460 // Restrict this optimization to profitable cases with one or two range
1461 // checks.
1462 if (NonContRanges > 2)
1463 return nullptr;
1464
1465 // Slice off the character's high end bits.
1466 CharVal = B.CreateTrunc(V: CharVal, DestTy: B.getInt8Ty());
1467
1468 SmallVector<Value *> CharCompares;
1469 for (unsigned char C : SortedStr)
1470 CharCompares.push_back(Elt: B.CreateICmpEQ(LHS: CharVal, RHS: B.getInt8(C)));
1471
1472 return B.CreateIntToPtr(V: B.CreateOr(Ops: CharCompares), DestTy: CI->getType());
1473 }
1474
1475 // For the bit field use a power-of-2 type with at least 8 bits to avoid
1476 // creating unnecessary illegal types.
1477 unsigned char Width = NextPowerOf2(A: std::max(a: (unsigned char)7, b: Max));
1478
1479 // Now build the bit field.
1480 APInt Bitfield(Width, 0);
1481 for (char C : Str)
1482 Bitfield.setBit((unsigned char)C);
1483 Value *BitfieldC = B.getInt(AI: Bitfield);
1484
1485 // Adjust width of "C" to the bitfield width, then mask off the high bits.
1486 Value *C = B.CreateZExtOrTrunc(V: CharVal, DestTy: BitfieldC->getType());
1487 C = B.CreateAnd(LHS: C, RHS: B.getIntN(N: Width, C: 0xFF));
1488
1489 // First check that the bit field access is within bounds.
1490 Value *Bounds = B.CreateICmp(P: ICmpInst::ICMP_ULT, LHS: C, RHS: B.getIntN(N: Width, C: Width),
1491 Name: "memchr.bounds");
1492
1493 // Create code that checks if the given bit is set in the field.
1494 Value *Shl = B.CreateShl(LHS: B.getIntN(N: Width, C: 1ULL), RHS: C);
1495 Value *Bits = B.CreateIsNotNull(Arg: B.CreateAnd(LHS: Shl, RHS: BitfieldC), Name: "memchr.bits");
1496
1497 // Finally merge both checks and cast to pointer type. The inttoptr
1498 // implicitly zexts the i1 to intptr type.
1499 return B.CreateIntToPtr(V: B.CreateLogicalAnd(Cond1: Bounds, Cond2: Bits, Name: "memchr"),
1500 DestTy: CI->getType());
1501}
1502
1503// Optimize a memcmp or, when StrNCmp is true, strncmp call CI with constant
1504// arrays LHS and RHS and nonconstant Size.
1505static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS,
1506 Value *Size, bool StrNCmp,
1507 IRBuilderBase &B, const DataLayout &DL) {
1508 if (LHS == RHS) // memcmp(s,s,x) -> 0
1509 return Constant::getNullValue(Ty: CI->getType());
1510
1511 StringRef LStr, RStr;
1512 if (!getConstantStringInfo(V: LHS, Str&: LStr, /*TrimAtNul=*/false) ||
1513 !getConstantStringInfo(V: RHS, Str&: RStr, /*TrimAtNul=*/false))
1514 return nullptr;
1515
1516 // If the contents of both constant arrays are known, fold a call to
1517 // memcmp(A, B, N) to
1518 // N <= Pos ? 0 : (A < B ? -1 : B < A ? +1 : 0)
1519 // where Pos is the first mismatch between A and B, determined below.
1520
1521 uint64_t Pos = 0;
1522 Value *Zero = ConstantInt::get(Ty: CI->getType(), V: 0);
1523 for (uint64_t MinSize = std::min(a: LStr.size(), b: RStr.size()); ; ++Pos) {
1524 if (Pos == MinSize ||
1525 (StrNCmp && (LStr[Pos] == '\0' && RStr[Pos] == '\0'))) {
1526 // One array is a leading part of the other of equal or greater
1527 // size, or for strncmp, the arrays are equal strings.
1528 // Fold the result to zero. Size is assumed to be in bounds, since
1529 // otherwise the call would be undefined.
1530 return Zero;
1531 }
1532
1533 if (LStr[Pos] != RStr[Pos])
1534 break;
1535 }
1536
1537 // Normalize the result.
1538 typedef unsigned char UChar;
1539 int IRes = UChar(LStr[Pos]) < UChar(RStr[Pos]) ? -1 : 1;
1540 Value *MaxSize = ConstantInt::get(Ty: Size->getType(), V: Pos);
1541 Value *Cmp = B.CreateICmp(P: ICmpInst::ICMP_ULE, LHS: Size, RHS: MaxSize);
1542 Value *Res = ConstantInt::getSigned(Ty: CI->getType(), V: IRes);
1543 return B.CreateSelect(C: Cmp, True: Zero, False: Res);
1544}
1545
1546// Optimize a memcmp call CI with constant size Len.
1547static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
1548 uint64_t Len, IRBuilderBase &B,
1549 const DataLayout &DL) {
1550 if (Len == 0) // memcmp(s1,s2,0) -> 0
1551 return Constant::getNullValue(Ty: CI->getType());
1552
1553 // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
1554 if (Len == 1) {
1555 Value *LHSV = B.CreateZExt(V: B.CreateLoad(Ty: B.getInt8Ty(), Ptr: LHS, Name: "lhsc"),
1556 DestTy: CI->getType(), Name: "lhsv");
1557 Value *RHSV = B.CreateZExt(V: B.CreateLoad(Ty: B.getInt8Ty(), Ptr: RHS, Name: "rhsc"),
1558 DestTy: CI->getType(), Name: "rhsv");
1559 return B.CreateSub(LHS: LHSV, RHS: RHSV, Name: "chardiff");
1560 }
1561
1562 // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
1563 // TODO: The case where both inputs are constants does not need to be limited
1564 // to legal integers or equality comparison. See block below this.
1565 if (DL.isLegalInteger(Width: Len * 8) && isOnlyUsedInZeroEqualityComparison(CxtI: CI)) {
1566 IntegerType *IntType = IntegerType::get(C&: CI->getContext(), NumBits: Len * 8);
1567 Align PrefAlignment = DL.getPrefTypeAlign(Ty: IntType);
1568
1569 // First, see if we can fold either argument to a constant.
1570 Value *LHSV = nullptr;
1571 if (auto *LHSC = dyn_cast<Constant>(Val: LHS))
1572 LHSV = ConstantFoldLoadFromConstPtr(C: LHSC, Ty: IntType, DL);
1573
1574 Value *RHSV = nullptr;
1575 if (auto *RHSC = dyn_cast<Constant>(Val: RHS))
1576 RHSV = ConstantFoldLoadFromConstPtr(C: RHSC, Ty: IntType, DL);
1577
1578 // Don't generate unaligned loads. If either source is constant data,
1579 // alignment doesn't matter for that source because there is no load.
1580 if ((LHSV || getKnownAlignment(V: LHS, DL, CxtI: CI) >= PrefAlignment) &&
1581 (RHSV || getKnownAlignment(V: RHS, DL, CxtI: CI) >= PrefAlignment)) {
1582 if (!LHSV)
1583 LHSV = B.CreateLoad(Ty: IntType, Ptr: LHS, Name: "lhsv");
1584 if (!RHSV)
1585 RHSV = B.CreateLoad(Ty: IntType, Ptr: RHS, Name: "rhsv");
1586 return B.CreateZExt(V: B.CreateICmpNE(LHS: LHSV, RHS: RHSV), DestTy: CI->getType(), Name: "memcmp");
1587 }
1588 }
1589
1590 return nullptr;
1591}
1592
1593// Most simplifications for memcmp also apply to bcmp.
1594Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI,
1595 IRBuilderBase &B) {
1596 Value *LHS = CI->getArgOperand(i: 0), *RHS = CI->getArgOperand(i: 1);
1597 Value *Size = CI->getArgOperand(i: 2);
1598
1599 annotateNonNullAndDereferenceable(CI, ArgNos: {0, 1}, Size, DL);
1600
1601 if (Value *Res = optimizeMemCmpVarSize(CI, LHS, RHS, Size, StrNCmp: false, B, DL))
1602 return Res;
1603
1604 // Handle constant Size.
1605 ConstantInt *LenC = dyn_cast<ConstantInt>(Val: Size);
1606 if (!LenC)
1607 return nullptr;
1608
1609 return optimizeMemCmpConstantSize(CI, LHS, RHS, Len: LenC->getZExtValue(), B, DL);
1610}
1611
1612Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilderBase &B) {
1613 Module *M = CI->getModule();
1614 if (Value *V = optimizeMemCmpBCmpCommon(CI, B))
1615 return V;
1616
1617 // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0
1618 // bcmp can be more efficient than memcmp because it only has to know that
1619 // there is a difference, not how different one is to the other.
1620 if (isLibFuncEmittable(M, TLI, TheLibFunc: LibFunc_bcmp) &&
1621 isOnlyUsedInZeroEqualityComparison(CxtI: CI)) {
1622 Value *LHS = CI->getArgOperand(i: 0);
1623 Value *RHS = CI->getArgOperand(i: 1);
1624 Value *Size = CI->getArgOperand(i: 2);
1625 return copyFlags(Old: *CI, New: emitBCmp(Ptr1: LHS, Ptr2: RHS, Len: Size, B, DL, TLI));
1626 }
1627
1628 return nullptr;
1629}
1630
1631Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilderBase &B) {
1632 return optimizeMemCmpBCmpCommon(CI, B);
1633}
1634
1635Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilderBase &B) {
1636 Value *Size = CI->getArgOperand(i: 2);
1637 annotateNonNullAndDereferenceable(CI, ArgNos: {0, 1}, Size, DL);
1638 if (isa<IntrinsicInst>(Val: CI))
1639 return nullptr;
1640
1641 // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n)
1642 CallInst *NewCI = B.CreateMemCpy(Dst: CI->getArgOperand(i: 0), DstAlign: Align(1),
1643 Src: CI->getArgOperand(i: 1), SrcAlign: Align(1), Size);
1644 mergeAttributesAndFlags(NewCI, Old: *CI);
1645 return CI->getArgOperand(i: 0);
1646}
1647
1648Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilderBase &B) {
1649 Value *Dst = CI->getArgOperand(i: 0);
1650 Value *Src = CI->getArgOperand(i: 1);
1651 ConstantInt *StopChar = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 2));
1652 ConstantInt *N = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 3));
1653 StringRef SrcStr;
1654 if (CI->use_empty() && Dst == Src)
1655 return Dst;
1656 // memccpy(d, s, c, 0) -> nullptr
1657 if (N) {
1658 if (N->isNullValue())
1659 return Constant::getNullValue(Ty: CI->getType());
1660 if (!getConstantStringInfo(V: Src, Str&: SrcStr, /*TrimAtNul=*/false) ||
1661 // TODO: Handle zeroinitializer.
1662 !StopChar)
1663 return nullptr;
1664 } else {
1665 return nullptr;
1666 }
1667
1668 // Wrap arg 'c' of type int to char
1669 size_t Pos = SrcStr.find(C: StopChar->getSExtValue() & 0xFF);
1670 if (Pos == StringRef::npos) {
1671 if (N->getZExtValue() <= SrcStr.size()) {
1672 copyFlags(Old: *CI, New: B.CreateMemCpy(Dst, DstAlign: Align(1), Src, SrcAlign: Align(1),
1673 Size: CI->getArgOperand(i: 3)));
1674 return Constant::getNullValue(Ty: CI->getType());
1675 }
1676 return nullptr;
1677 }
1678
1679 Value *NewN =
1680 ConstantInt::get(Ty: N->getType(), V: std::min(a: uint64_t(Pos + 1), b: N->getZExtValue()));
1681 // memccpy -> llvm.memcpy
1682 copyFlags(Old: *CI, New: B.CreateMemCpy(Dst, DstAlign: Align(1), Src, SrcAlign: Align(1), Size: NewN));
1683 return Pos + 1 <= N->getZExtValue()
1684 ? B.CreateInBoundsGEP(Ty: B.getInt8Ty(), Ptr: Dst, IdxList: NewN)
1685 : Constant::getNullValue(Ty: CI->getType());
1686}
1687
1688Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilderBase &B) {
1689 Value *Dst = CI->getArgOperand(i: 0);
1690 Value *N = CI->getArgOperand(i: 2);
1691 // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n
1692 CallInst *NewCI =
1693 B.CreateMemCpy(Dst, DstAlign: Align(1), Src: CI->getArgOperand(i: 1), SrcAlign: Align(1), Size: N);
1694 // Propagate attributes, but memcpy has no return value, so make sure that
1695 // any return attributes are compliant.
1696 // TODO: Attach return value attributes to the 1st operand to preserve them?
1697 mergeAttributesAndFlags(NewCI, Old: *CI);
1698 return B.CreateInBoundsGEP(Ty: B.getInt8Ty(), Ptr: Dst, IdxList: N);
1699}
1700
1701Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilderBase &B) {
1702 Value *Size = CI->getArgOperand(i: 2);
1703 annotateNonNullAndDereferenceable(CI, ArgNos: {0, 1}, Size, DL);
1704 if (isa<IntrinsicInst>(Val: CI))
1705 return nullptr;
1706
1707 // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n)
1708 CallInst *NewCI = B.CreateMemMove(Dst: CI->getArgOperand(i: 0), DstAlign: Align(1),
1709 Src: CI->getArgOperand(i: 1), SrcAlign: Align(1), Size);
1710 mergeAttributesAndFlags(NewCI, Old: *CI);
1711 return CI->getArgOperand(i: 0);
1712}
1713
1714Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilderBase &B) {
1715 Value *Size = CI->getArgOperand(i: 2);
1716 annotateNonNullAndDereferenceable(CI, ArgNos: 0, Size, DL);
1717 if (isa<IntrinsicInst>(Val: CI))
1718 return nullptr;
1719
1720 // memset(p, v, n) -> llvm.memset(align 1 p, v, n)
1721 Value *Val = B.CreateIntCast(V: CI->getArgOperand(i: 1), DestTy: B.getInt8Ty(), isSigned: false);
1722 CallInst *NewCI = B.CreateMemSet(Ptr: CI->getArgOperand(i: 0), Val, Size, Align: Align(1));
1723 mergeAttributesAndFlags(NewCI, Old: *CI);
1724 return CI->getArgOperand(i: 0);
1725}
1726
1727Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilderBase &B) {
1728 if (isa<ConstantPointerNull>(Val: CI->getArgOperand(i: 0)))
1729 return copyFlags(Old: *CI, New: emitMalloc(Num: CI->getArgOperand(i: 1), B, DL, TLI));
1730
1731 return nullptr;
1732}
1733
1734// Optionally allow optimization of nobuiltin calls to operator new and its
1735// variants.
1736Value *LibCallSimplifier::maybeOptimizeNoBuiltinOperatorNew(CallInst *CI,
1737 IRBuilderBase &B) {
1738 if (!OptimizeHotColdNew)
1739 return nullptr;
1740 Function *Callee = CI->getCalledFunction();
1741 if (!Callee)
1742 return nullptr;
1743 LibFunc Func;
1744 if (!TLI->getLibFunc(FDecl: *Callee, F&: Func))
1745 return nullptr;
1746 switch (Func) {
1747 case LibFunc_Znwm:
1748 case LibFunc_ZnwmRKSt9nothrow_t:
1749 case LibFunc_ZnwmSt11align_val_t:
1750 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
1751 case LibFunc_Znam:
1752 case LibFunc_ZnamRKSt9nothrow_t:
1753 case LibFunc_ZnamSt11align_val_t:
1754 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
1755 case LibFunc_size_returning_new:
1756 case LibFunc_size_returning_new_aligned:
1757 // By default normal operator new calls (not already passing a hot_cold_t
1758 // parameter) are not mutated if the call is not marked builtin. Optionally
1759 // enable that in cases where it is known to be safe.
1760 if (!OptimizeNoBuiltinHotColdNew)
1761 return nullptr;
1762 break;
1763 case LibFunc_Znwm12__hot_cold_t:
1764 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
1765 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
1766 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
1767 case LibFunc_Znam12__hot_cold_t:
1768 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
1769 case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
1770 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
1771 case LibFunc_size_returning_new_hot_cold:
1772 case LibFunc_size_returning_new_aligned_hot_cold:
1773 // If the nobuiltin call already passes a hot_cold_t parameter, allow update
1774 // of that parameter when enabled.
1775 if (!OptimizeExistingHotColdNew)
1776 return nullptr;
1777 break;
1778 default:
1779 return nullptr;
1780 }
1781 return optimizeNew(CI, B, Func);
1782}
1783
1784// When enabled, replace operator new() calls marked with a hot or cold memprof
1785// attribute with an operator new() call that takes a __hot_cold_t parameter.
1786// Currently this is supported by the open source version of tcmalloc, see:
1787// https://github.com/google/tcmalloc/blob/master/tcmalloc/new_extension.h
1788Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B,
1789 LibFunc &Func) {
1790 if (!OptimizeHotColdNew)
1791 return nullptr;
1792
1793 uint8_t HotCold;
1794 if (CI->getAttributes().getFnAttr(Kind: "memprof").getValueAsString() == "cold")
1795 HotCold = ColdNewHintValue;
1796 else if (CI->getAttributes().getFnAttr(Kind: "memprof").getValueAsString() ==
1797 "notcold")
1798 HotCold = NotColdNewHintValue;
1799 else if (CI->getAttributes().getFnAttr(Kind: "memprof").getValueAsString() == "hot")
1800 HotCold = HotNewHintValue;
1801 else if (CI->getAttributes().getFnAttr(Kind: "memprof").getValueAsString() ==
1802 "ambiguous")
1803 HotCold = AmbiguousNewHintValue;
1804 else
1805 return nullptr;
1806
1807 // For calls that already pass a hot/cold hint, only update the hint if
1808 // directed by OptimizeExistingHotColdNew. For other calls to new, add a hint
1809 // if cold or hot, and leave as-is for default handling if "notcold" aka warm.
1810 // Note that in cases where we decide it is "notcold", it might be slightly
1811 // better to replace the hinted call with a non hinted call, to avoid the
1812 // extra parameter and the if condition check of the hint value in the
1813 // allocator. This can be considered in the future.
1814 Value *NewCall = nullptr;
1815 switch (Func) {
1816 case LibFunc_Znwm12__hot_cold_t:
1817 if (OptimizeExistingHotColdNew)
1818 NewCall = emitHotColdNew(Num: CI->getArgOperand(i: 0), B, TLI,
1819 NewFunc: LibFunc_Znwm12__hot_cold_t, HotCold);
1820 break;
1821 case LibFunc_Znwm:
1822 NewCall = emitHotColdNew(Num: CI->getArgOperand(i: 0), B, TLI,
1823 NewFunc: LibFunc_Znwm12__hot_cold_t, HotCold);
1824 break;
1825 case LibFunc_Znam12__hot_cold_t:
1826 if (OptimizeExistingHotColdNew)
1827 NewCall = emitHotColdNew(Num: CI->getArgOperand(i: 0), B, TLI,
1828 NewFunc: LibFunc_Znam12__hot_cold_t, HotCold);
1829 break;
1830 case LibFunc_Znam:
1831 NewCall = emitHotColdNew(Num: CI->getArgOperand(i: 0), B, TLI,
1832 NewFunc: LibFunc_Znam12__hot_cold_t, HotCold);
1833 break;
1834 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
1835 if (OptimizeExistingHotColdNew)
1836 NewCall = emitHotColdNewNoThrow(
1837 Num: CI->getArgOperand(i: 0), NoThrow: CI->getArgOperand(i: 1), B, TLI,
1838 NewFunc: LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, HotCold);
1839 break;
1840 case LibFunc_ZnwmRKSt9nothrow_t:
1841 NewCall = emitHotColdNewNoThrow(
1842 Num: CI->getArgOperand(i: 0), NoThrow: CI->getArgOperand(i: 1), B, TLI,
1843 NewFunc: LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, HotCold);
1844 break;
1845 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
1846 if (OptimizeExistingHotColdNew)
1847 NewCall = emitHotColdNewNoThrow(
1848 Num: CI->getArgOperand(i: 0), NoThrow: CI->getArgOperand(i: 1), B, TLI,
1849 NewFunc: LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, HotCold);
1850 break;
1851 case LibFunc_ZnamRKSt9nothrow_t:
1852 NewCall = emitHotColdNewNoThrow(
1853 Num: CI->getArgOperand(i: 0), NoThrow: CI->getArgOperand(i: 1), B, TLI,
1854 NewFunc: LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, HotCold);
1855 break;
1856 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
1857 if (OptimizeExistingHotColdNew)
1858 NewCall = emitHotColdNewAligned(
1859 Num: CI->getArgOperand(i: 0), Align: CI->getArgOperand(i: 1), B, TLI,
1860 NewFunc: LibFunc_ZnwmSt11align_val_t12__hot_cold_t, HotCold);
1861 break;
1862 case LibFunc_ZnwmSt11align_val_t:
1863 NewCall = emitHotColdNewAligned(
1864 Num: CI->getArgOperand(i: 0), Align: CI->getArgOperand(i: 1), B, TLI,
1865 NewFunc: LibFunc_ZnwmSt11align_val_t12__hot_cold_t, HotCold);
1866 break;
1867 case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
1868 if (OptimizeExistingHotColdNew)
1869 NewCall = emitHotColdNewAligned(
1870 Num: CI->getArgOperand(i: 0), Align: CI->getArgOperand(i: 1), B, TLI,
1871 NewFunc: LibFunc_ZnamSt11align_val_t12__hot_cold_t, HotCold);
1872 break;
1873 case LibFunc_ZnamSt11align_val_t:
1874 NewCall = emitHotColdNewAligned(
1875 Num: CI->getArgOperand(i: 0), Align: CI->getArgOperand(i: 1), B, TLI,
1876 NewFunc: LibFunc_ZnamSt11align_val_t12__hot_cold_t, HotCold);
1877 break;
1878 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
1879 if (OptimizeExistingHotColdNew)
1880 NewCall = emitHotColdNewAlignedNoThrow(
1881 Num: CI->getArgOperand(i: 0), Align: CI->getArgOperand(i: 1), NoThrow: CI->getArgOperand(i: 2), B,
1882 TLI, NewFunc: LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
1883 HotCold);
1884 break;
1885 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
1886 NewCall = emitHotColdNewAlignedNoThrow(
1887 Num: CI->getArgOperand(i: 0), Align: CI->getArgOperand(i: 1), NoThrow: CI->getArgOperand(i: 2), B,
1888 TLI, NewFunc: LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold);
1889 break;
1890 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
1891 if (OptimizeExistingHotColdNew)
1892 NewCall = emitHotColdNewAlignedNoThrow(
1893 Num: CI->getArgOperand(i: 0), Align: CI->getArgOperand(i: 1), NoThrow: CI->getArgOperand(i: 2), B,
1894 TLI, NewFunc: LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
1895 HotCold);
1896 break;
1897 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
1898 NewCall = emitHotColdNewAlignedNoThrow(
1899 Num: CI->getArgOperand(i: 0), Align: CI->getArgOperand(i: 1), NoThrow: CI->getArgOperand(i: 2), B,
1900 TLI, NewFunc: LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold);
1901 break;
1902 case LibFunc_size_returning_new:
1903 NewCall = emitHotColdSizeReturningNew(Num: CI->getArgOperand(i: 0), B, TLI,
1904 NewFunc: LibFunc_size_returning_new_hot_cold,
1905 HotCold);
1906 break;
1907 case LibFunc_size_returning_new_hot_cold:
1908 if (OptimizeExistingHotColdNew)
1909 NewCall = emitHotColdSizeReturningNew(Num: CI->getArgOperand(i: 0), B, TLI,
1910 NewFunc: LibFunc_size_returning_new_hot_cold,
1911 HotCold);
1912 break;
1913 case LibFunc_size_returning_new_aligned:
1914 NewCall = emitHotColdSizeReturningNewAligned(
1915 Num: CI->getArgOperand(i: 0), Align: CI->getArgOperand(i: 1), B, TLI,
1916 NewFunc: LibFunc_size_returning_new_aligned_hot_cold, HotCold);
1917 break;
1918 case LibFunc_size_returning_new_aligned_hot_cold:
1919 if (OptimizeExistingHotColdNew)
1920 NewCall = emitHotColdSizeReturningNewAligned(
1921 Num: CI->getArgOperand(i: 0), Align: CI->getArgOperand(i: 1), B, TLI,
1922 NewFunc: LibFunc_size_returning_new_aligned_hot_cold, HotCold);
1923 break;
1924 default:
1925 return nullptr;
1926 }
1927
1928 if (auto *NewCI = dyn_cast_or_null<Instruction>(Val: NewCall))
1929 NewCI->copyMetadata(SrcInst: *CI);
1930
1931 return NewCall;
1932}
1933
1934//===----------------------------------------------------------------------===//
1935// Math Library Optimizations
1936//===----------------------------------------------------------------------===//
1937
1938// Replace a libcall \p CI with a call to intrinsic \p IID
1939static Value *replaceUnaryCall(CallInst *CI, IRBuilderBase &B,
1940 Intrinsic::ID IID) {
1941 Value *NewCall = B.CreateUnaryIntrinsic(ID: IID, Op: CI->getArgOperand(i: 0), FMFSource: CI);
1942 NewCall->takeName(V: CI);
1943 return copyFlags(Old: *CI, New: NewCall);
1944}
1945
1946static Value *replaceBinaryCall(CallInst *CI, IRBuilderBase &B,
1947 Intrinsic::ID IID) {
1948 Value *NewCall = B.CreateBinaryIntrinsic(ID: IID, LHS: CI->getArgOperand(i: 0),
1949 RHS: CI->getArgOperand(i: 1), FMFSource: CI);
1950 NewCall->takeName(V: CI);
1951 return copyFlags(Old: *CI, New: NewCall);
1952}
1953
1954/// Return a variant of Val with float type.
1955/// Currently this works in two cases: If Val is an FPExtension of a float
1956/// value to something bigger, simply return the operand.
1957/// If Val is a ConstantFP but can be converted to a float ConstantFP without
1958/// loss of precision do so.
1959static Value *valueHasFloatPrecision(Value *Val) {
1960 if (FPExtInst *Cast = dyn_cast<FPExtInst>(Val)) {
1961 Value *Op = Cast->getOperand(i_nocapture: 0);
1962 if (Op->getType()->isFloatTy())
1963 return Op;
1964 }
1965 if (ConstantFP *Const = dyn_cast<ConstantFP>(Val)) {
1966 APFloat F = Const->getValueAPF();
1967 bool losesInfo;
1968 (void)F.convert(ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven,
1969 losesInfo: &losesInfo);
1970 if (!losesInfo)
1971 return ConstantFP::get(Context&: Const->getContext(), V: F);
1972 }
1973 return nullptr;
1974}
1975
1976/// Shrink double -> float functions.
1977static Value *optimizeDoubleFP(CallInst *CI, IRBuilderBase &B,
1978 bool isBinary, const TargetLibraryInfo *TLI,
1979 bool isPrecise = false) {
1980 Function *CalleeFn = CI->getCalledFunction();
1981 if (!CI->getType()->isDoubleTy() || !CalleeFn)
1982 return nullptr;
1983
1984 // If not all the uses of the function are converted to float, then bail out.
1985 // This matters if the precision of the result is more important than the
1986 // precision of the arguments.
1987 if (isPrecise)
1988 for (User *U : CI->users()) {
1989 FPTruncInst *Cast = dyn_cast<FPTruncInst>(Val: U);
1990 if (!Cast || !Cast->getType()->isFloatTy())
1991 return nullptr;
1992 }
1993
1994 // If this is something like 'g((double) float)', convert to 'gf(float)'.
1995 Value *V[2];
1996 V[0] = valueHasFloatPrecision(Val: CI->getArgOperand(i: 0));
1997 V[1] = isBinary ? valueHasFloatPrecision(Val: CI->getArgOperand(i: 1)) : nullptr;
1998 if (!V[0] || (isBinary && !V[1]))
1999 return nullptr;
2000
2001 // If call isn't an intrinsic, check that it isn't within a function with the
2002 // same name as the float version of this call, otherwise the result is an
2003 // infinite loop. For example, from MinGW-w64:
2004 //
2005 // float expf(float val) { return (float) exp((double) val); }
2006 StringRef CalleeName = CalleeFn->getName();
2007 bool IsIntrinsic = CalleeFn->isIntrinsic();
2008 if (!IsIntrinsic) {
2009 StringRef CallerName = CI->getFunction()->getName();
2010 if (CallerName.ends_with(Suffix: 'f') &&
2011 CallerName.size() == (CalleeName.size() + 1) &&
2012 CallerName.starts_with(Prefix: CalleeName))
2013 return nullptr;
2014 }
2015
2016 // Propagate the math semantics from the current function to the new function.
2017 IRBuilderBase::FastMathFlagGuard Guard(B);
2018 B.setFastMathFlags(CI->getFastMathFlags());
2019
2020 // g((double) float) -> (double) gf(float)
2021 Value *R;
2022 if (IsIntrinsic) {
2023 Intrinsic::ID IID = CalleeFn->getIntrinsicID();
2024 R = isBinary ? B.CreateIntrinsic(ID: IID, OverloadTypes: B.getFloatTy(), Args: V)
2025 : B.CreateIntrinsic(ID: IID, OverloadTypes: B.getFloatTy(), Args: V[0]);
2026 } else {
2027 AttributeList CallsiteAttrs = CI->getAttributes();
2028 R = isBinary
2029 ? emitBinaryFloatFnCall(Op1: V[0], Op2: V[1], TLI, Name: CalleeName, B,
2030 Attrs: CallsiteAttrs)
2031 : emitUnaryFloatFnCall(Op: V[0], TLI, Name: CalleeName, B, Attrs: CallsiteAttrs);
2032 }
2033 return B.CreateFPExt(V: R, DestTy: B.getDoubleTy());
2034}
2035
2036/// Shrink double -> float for unary functions.
2037static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilderBase &B,
2038 const TargetLibraryInfo *TLI,
2039 bool isPrecise = false) {
2040 return optimizeDoubleFP(CI, B, isBinary: false, TLI, isPrecise);
2041}
2042
2043/// Shrink double -> float for binary functions.
2044static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilderBase &B,
2045 const TargetLibraryInfo *TLI,
2046 bool isPrecise = false) {
2047 return optimizeDoubleFP(CI, B, isBinary: true, TLI, isPrecise);
2048}
2049
2050// cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z)))
2051Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilderBase &B) {
2052 Value *Real, *Imag;
2053
2054 if (CI->arg_size() == 1) {
2055
2056 if (!CI->isFast())
2057 return nullptr;
2058
2059 Value *Op = CI->getArgOperand(i: 0);
2060 assert(Op->getType()->isArrayTy() && "Unexpected signature for cabs!");
2061
2062 Real = B.CreateExtractValue(Agg: Op, Idxs: 0, Name: "real");
2063 Imag = B.CreateExtractValue(Agg: Op, Idxs: 1, Name: "imag");
2064
2065 } else {
2066 assert(CI->arg_size() == 2 && "Unexpected signature for cabs!");
2067
2068 Real = CI->getArgOperand(i: 0);
2069 Imag = CI->getArgOperand(i: 1);
2070
2071 // if real or imaginary part is zero, simplify to abs(cimag(z))
2072 // or abs(creal(z))
2073 Value *AbsOp = nullptr;
2074 if (ConstantFP *ConstReal = dyn_cast<ConstantFP>(Val: Real)) {
2075 if (ConstReal->isZero())
2076 AbsOp = Imag;
2077
2078 } else if (ConstantFP *ConstImag = dyn_cast<ConstantFP>(Val: Imag)) {
2079 if (ConstImag->isZero())
2080 AbsOp = Real;
2081 }
2082
2083 if (AbsOp)
2084 return copyFlags(Old: *CI, New: B.CreateFAbs(V: AbsOp, FMFSource: CI, Name: "cabs"));
2085
2086 if (!CI->isFast())
2087 return nullptr;
2088 }
2089
2090 // Propagate fast-math flags from the existing call to new instructions.
2091 Value *RealReal = B.CreateFMulFMF(L: Real, R: Real, FMFSource: CI);
2092 Value *ImagImag = B.CreateFMulFMF(L: Imag, R: Imag, FMFSource: CI);
2093 return copyFlags(
2094 Old: *CI, New: B.CreateUnaryIntrinsic(ID: Intrinsic::sqrt,
2095 Op: B.CreateFAddFMF(L: RealReal, R: ImagImag, FMFSource: CI), FMFSource: CI,
2096 Name: "cabs"));
2097}
2098
2099// Return a properly extended integer (DstWidth bits wide) if the operation is
2100// an itofp.
2101static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) {
2102 if (isa<SIToFPInst>(Val: I2F) || isa<UIToFPInst>(Val: I2F)) {
2103 Value *Op = cast<Instruction>(Val: I2F)->getOperand(i: 0);
2104 // Make sure that the exponent fits inside an "int" of size DstWidth,
2105 // thus avoiding any range issues that FP has not.
2106 unsigned BitWidth = Op->getType()->getScalarSizeInBits();
2107 if (BitWidth < DstWidth || (BitWidth == DstWidth && isa<SIToFPInst>(Val: I2F))) {
2108 Type *IntTy = Op->getType()->getWithNewBitWidth(NewBitWidth: DstWidth);
2109 return isa<SIToFPInst>(Val: I2F) ? B.CreateSExt(V: Op, DestTy: IntTy)
2110 : B.CreateZExt(V: Op, DestTy: IntTy);
2111 }
2112 }
2113
2114 return nullptr;
2115}
2116
2117/// Use exp{,2}(x * y) for pow(exp{,2}(x), y);
2118/// ldexp(1.0, x) for pow(2.0, itofp(x)); exp2(n * x) for pow(2.0 ** n, x);
2119/// exp10(x) for pow(10.0, x); exp2(log2(n) * x) for pow(n, x).
2120Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
2121 Module *M = Pow->getModule();
2122 Value *Base = Pow->getArgOperand(i: 0), *Expo = Pow->getArgOperand(i: 1);
2123 Type *Ty = Pow->getType();
2124 bool Ignored;
2125
2126 // Evaluate special cases related to a nested function as the base.
2127
2128 // pow(exp(x), y) -> exp(x * y)
2129 // pow(exp2(x), y) -> exp2(x * y)
2130 // If exp{,2}() is used only once, it is better to fold two transcendental
2131 // math functions into one. If used again, exp{,2}() would still have to be
2132 // called with the original argument, then keep both original transcendental
2133 // functions. However, this transformation is only safe with fully relaxed
2134 // math semantics, since, besides rounding differences, it changes overflow
2135 // and underflow behavior quite dramatically. For example:
2136 // pow(exp(1000), 0.001) = pow(inf, 0.001) = inf
2137 // Whereas:
2138 // exp(1000 * 0.001) = exp(1)
2139 // TODO: Loosen the requirement for fully relaxed math semantics.
2140 // TODO: Handle exp10() when more targets have it available.
2141 CallInst *BaseFn = dyn_cast<CallInst>(Val: Base);
2142 if (BaseFn && BaseFn->hasOneUse() && BaseFn->isFast() && Pow->isFast()) {
2143 LibFunc LibFn;
2144
2145 Function *CalleeFn = BaseFn->getCalledFunction();
2146 if (CalleeFn && TLI->getLibFunc(funcName: CalleeFn->getName(), F&: LibFn) &&
2147 isLibFuncEmittable(M, TLI, TheLibFunc: LibFn)) {
2148 StringRef ExpName;
2149 Intrinsic::ID ID;
2150 Value *ExpFn;
2151 LibFunc LibFnFloat, LibFnDouble, LibFnLongDouble;
2152
2153 switch (LibFn) {
2154 default:
2155 return nullptr;
2156 case LibFunc_expf:
2157 case LibFunc_exp:
2158 case LibFunc_expl:
2159 ExpName = TLI->getName(F: LibFunc_exp);
2160 ID = Intrinsic::exp;
2161 LibFnFloat = LibFunc_expf;
2162 LibFnDouble = LibFunc_exp;
2163 LibFnLongDouble = LibFunc_expl;
2164 break;
2165 case LibFunc_exp2f:
2166 case LibFunc_exp2:
2167 case LibFunc_exp2l:
2168 ExpName = TLI->getName(F: LibFunc_exp2);
2169 ID = Intrinsic::exp2;
2170 LibFnFloat = LibFunc_exp2f;
2171 LibFnDouble = LibFunc_exp2;
2172 LibFnLongDouble = LibFunc_exp2l;
2173 break;
2174 }
2175
2176 // Create new exp{,2}() with the product as its argument.
2177 Value *FMul = B.CreateFMul(L: BaseFn->getArgOperand(i: 0), R: Expo, Name: "mul");
2178 ExpFn = BaseFn->doesNotAccessMemory()
2179 ? B.CreateUnaryIntrinsic(ID, Op: FMul, FMFSource: nullptr, Name: ExpName)
2180 : emitUnaryFloatFnCall(Op: FMul, TLI, DoubleFn: LibFnDouble, FloatFn: LibFnFloat,
2181 LongDoubleFn: LibFnLongDouble, B,
2182 Attrs: BaseFn->getAttributes());
2183
2184 // Since the new exp{,2}() is different from the original one, dead code
2185 // elimination cannot be trusted to remove it, since it may have side
2186 // effects (e.g., errno). When the only consumer for the original
2187 // exp{,2}() is pow(), then it has to be explicitly erased.
2188 substituteInParent(I: BaseFn, With: ExpFn);
2189 return ExpFn;
2190 }
2191 }
2192
2193 // Evaluate special cases related to a constant base.
2194
2195 const APFloat *BaseF;
2196 if (!match(V: Base, P: m_APFloat(Res&: BaseF)))
2197 return nullptr;
2198
2199 AttributeList NoAttrs; // Attributes are only meaningful on the original call
2200
2201 const bool UseIntrinsic = Pow->doesNotAccessMemory();
2202
2203 // pow(2.0, itofp(x)) -> ldexp(1.0, x)
2204 if ((UseIntrinsic || !Ty->isVectorTy()) && BaseF->isExactlyValue(V: 2.0) &&
2205 (isa<SIToFPInst>(Val: Expo) || isa<UIToFPInst>(Val: Expo)) &&
2206 (UseIntrinsic ||
2207 hasFloatFn(M, TLI, Ty, DoubleFn: LibFunc_ldexp, FloatFn: LibFunc_ldexpf, LongDoubleFn: LibFunc_ldexpl))) {
2208
2209 // TODO: Shouldn't really need to depend on getIntToFPVal for intrinsic. Can
2210 // just directly use the original integer type.
2211 if (Value *ExpoI = getIntToFPVal(I2F: Expo, B, DstWidth: TLI->getIntSize())) {
2212 Constant *One = ConstantFP::get(Ty, V: 1.0);
2213
2214 if (UseIntrinsic) {
2215 return copyFlags(Old: *Pow, New: B.CreateIntrinsic(ID: Intrinsic::ldexp,
2216 OverloadTypes: {Ty, ExpoI->getType()},
2217 Args: {One, ExpoI}, FMFSource: Pow, Name: "exp2"));
2218 }
2219
2220 return copyFlags(Old: *Pow, New: emitBinaryFloatFnCall(
2221 Op1: One, Op2: ExpoI, TLI, DoubleFn: LibFunc_ldexp, FloatFn: LibFunc_ldexpf,
2222 LongDoubleFn: LibFunc_ldexpl, B, Attrs: NoAttrs));
2223 }
2224 }
2225
2226 // pow(2.0 ** n, x) -> exp2(n * x)
2227 if (hasFloatFn(M, TLI, Ty, DoubleFn: LibFunc_exp2, FloatFn: LibFunc_exp2f, LongDoubleFn: LibFunc_exp2l)) {
2228 APFloat BaseR = APFloat(1.0);
2229 BaseR.convert(ToSemantics: BaseF->getSemantics(), RM: APFloat::rmTowardZero, losesInfo: &Ignored);
2230 BaseR = BaseR / *BaseF;
2231 bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger();
2232 const APFloat *NF = IsReciprocal ? &BaseR : BaseF;
2233 APSInt NI(64, false);
2234 if ((IsInteger || IsReciprocal) &&
2235 NF->convertToInteger(Result&: NI, RM: APFloat::rmTowardZero, IsExact: &Ignored) ==
2236 APFloat::opOK &&
2237 NI > 1 && NI.isPowerOf2()) {
2238 double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
2239 Value *FMul = B.CreateFMul(L: Expo, R: ConstantFP::get(Ty, V: N), Name: "mul");
2240 if (Pow->doesNotAccessMemory())
2241 return copyFlags(Old: *Pow, New: B.CreateUnaryIntrinsic(ID: Intrinsic::exp2, Op: FMul,
2242 FMFSource: nullptr, Name: "exp2"));
2243 else
2244 return copyFlags(Old: *Pow, New: emitUnaryFloatFnCall(Op: FMul, TLI, DoubleFn: LibFunc_exp2,
2245 FloatFn: LibFunc_exp2f,
2246 LongDoubleFn: LibFunc_exp2l, B, Attrs: NoAttrs));
2247 }
2248 }
2249
2250 // pow(10.0, x) -> exp10(x)
2251 if (BaseF->isExactlyValue(V: 10.0) &&
2252 hasFloatFn(M, TLI, Ty, DoubleFn: LibFunc_exp10, FloatFn: LibFunc_exp10f, LongDoubleFn: LibFunc_exp10l)) {
2253
2254 if (Pow->doesNotAccessMemory()) {
2255 return B.CreateIntrinsic(ID: Intrinsic::exp10, OverloadTypes: {Ty}, Args: {Expo}, FMFSource: Pow, Name: "exp10", OpBundles: {},
2256 SetFn: [Pow](CallInst *CI) { CI->copyIRFlags(V: Pow); });
2257 }
2258
2259 return copyFlags(Old: *Pow, New: emitUnaryFloatFnCall(Op: Expo, TLI, DoubleFn: LibFunc_exp10,
2260 FloatFn: LibFunc_exp10f, LongDoubleFn: LibFunc_exp10l,
2261 B, Attrs: NoAttrs));
2262 }
2263
2264 // pow(x, y) -> exp2(log2(x) * y)
2265 if (Pow->hasApproxFunc() && Pow->hasNoNaNs() && BaseF->isFiniteNonZero() &&
2266 !BaseF->isNegative()) {
2267 // pow(1, inf) is defined to be 1 but exp2(log2(1) * inf) evaluates to NaN.
2268 // Luckily optimizePow has already handled the x == 1 case.
2269 assert(!match(Base, m_FPOne()) &&
2270 "pow(1.0, y) should have been simplified earlier!");
2271
2272 Value *Log = nullptr;
2273 if (Ty->isFloatTy())
2274 Log = ConstantFP::get(Ty, V: std::log2(x: BaseF->convertToFloat()));
2275 else if (Ty->isDoubleTy())
2276 Log = ConstantFP::get(Ty, V: std::log2(x: BaseF->convertToDouble()));
2277
2278 if (Log) {
2279 Value *FMul = B.CreateFMul(L: Log, R: Expo, Name: "mul");
2280 if (Pow->doesNotAccessMemory())
2281 return copyFlags(Old: *Pow, New: B.CreateUnaryIntrinsic(ID: Intrinsic::exp2, Op: FMul,
2282 FMFSource: nullptr, Name: "exp2"));
2283 else if (hasFloatFn(M, TLI, Ty, DoubleFn: LibFunc_exp2, FloatFn: LibFunc_exp2f,
2284 LongDoubleFn: LibFunc_exp2l))
2285 return copyFlags(Old: *Pow, New: emitUnaryFloatFnCall(Op: FMul, TLI, DoubleFn: LibFunc_exp2,
2286 FloatFn: LibFunc_exp2f,
2287 LongDoubleFn: LibFunc_exp2l, B, Attrs: NoAttrs));
2288 }
2289 }
2290
2291 return nullptr;
2292}
2293
2294static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno,
2295 Module *M, IRBuilderBase &B,
2296 const TargetLibraryInfo *TLI) {
2297 // If errno is never set, then use the intrinsic for sqrt().
2298 if (NoErrno)
2299 return B.CreateUnaryIntrinsic(ID: Intrinsic::sqrt, Op: V, FMFSource: nullptr, Name: "sqrt");
2300
2301 // Otherwise, use the libcall for sqrt().
2302 if (hasFloatFn(M, TLI, Ty: V->getType(), DoubleFn: LibFunc_sqrt, FloatFn: LibFunc_sqrtf,
2303 LongDoubleFn: LibFunc_sqrtl))
2304 // TODO: We also should check that the target can in fact lower the sqrt()
2305 // libcall. We currently have no way to ask this question, so we ask if
2306 // the target has a sqrt() libcall, which is not exactly the same.
2307 return emitUnaryFloatFnCall(Op: V, TLI, DoubleFn: LibFunc_sqrt, FloatFn: LibFunc_sqrtf,
2308 LongDoubleFn: LibFunc_sqrtl, B, Attrs);
2309
2310 return nullptr;
2311}
2312
2313/// Use square root in place of pow(x, +/-0.5).
2314Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) {
2315 Value *Sqrt, *Base = Pow->getArgOperand(i: 0), *Expo = Pow->getArgOperand(i: 1);
2316 Module *Mod = Pow->getModule();
2317 Type *Ty = Pow->getType();
2318
2319 const APFloat *ExpoF;
2320 if (!match(V: Expo, P: m_APFloat(Res&: ExpoF)) ||
2321 (!ExpoF->isExactlyValue(V: 0.5) && !ExpoF->isExactlyValue(V: -0.5)))
2322 return nullptr;
2323
2324 // Converting pow(X, -0.5) to 1/sqrt(X) may introduce an extra rounding step,
2325 // so that requires fast-math-flags (afn or reassoc).
2326 if (ExpoF->isNegative() && (!Pow->hasApproxFunc() && !Pow->hasAllowReassoc()))
2327 return nullptr;
2328
2329 // If we have a pow() library call (accesses memory) and we can't guarantee
2330 // that the base is not an infinity, give up:
2331 // pow(-Inf, 0.5) is optionally required to have a result of +Inf (not setting
2332 // errno), but sqrt(-Inf) is required by various standards to set errno.
2333 if (!Pow->doesNotAccessMemory() && !Pow->hasNoInfs() &&
2334 !isKnownNeverInfinity(
2335 V: Base, SQ: SimplifyQuery(DL, TLI, DT, AC, Pow, true, true, DC)))
2336 return nullptr;
2337
2338 Sqrt = getSqrtCall(V: Base, Attrs: AttributeList(), NoErrno: Pow->doesNotAccessMemory(), M: Mod, B,
2339 TLI);
2340 if (!Sqrt)
2341 return nullptr;
2342
2343 // Handle signed zero base by expanding to fabs(sqrt(x)).
2344 if (!Pow->hasNoSignedZeros())
2345 Sqrt = B.CreateFAbs(V: Sqrt, FMFSource: nullptr, Name: "abs");
2346
2347 Sqrt = copyFlags(Old: *Pow, New: Sqrt);
2348
2349 // Handle non finite base by expanding to
2350 // (x == -infinity ? +infinity : sqrt(x)).
2351 if (!Pow->hasNoInfs()) {
2352 Value *PosInf = ConstantFP::getInfinity(Ty),
2353 *NegInf = ConstantFP::getInfinity(Ty, Negative: true);
2354 Value *FCmp = B.CreateFCmpOEQ(LHS: Base, RHS: NegInf, Name: "isinf");
2355 Sqrt = B.CreateSelect(C: FCmp, True: PosInf, False: Sqrt);
2356 }
2357
2358 // If the exponent is negative, then get the reciprocal.
2359 if (ExpoF->isNegative())
2360 Sqrt = B.CreateFDiv(L: ConstantFP::get(Ty, V: 1.0), R: Sqrt, Name: "reciprocal");
2361
2362 return Sqrt;
2363}
2364
2365static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M,
2366 IRBuilderBase &B) {
2367 Value *Args[] = {Base, Expo};
2368 Type *Types[] = {Base->getType(), Expo->getType()};
2369 return B.CreateIntrinsic(ID: Intrinsic::powi, OverloadTypes: Types, Args);
2370}
2371
2372Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
2373 Value *Base = Pow->getArgOperand(i: 0);
2374 Value *Expo = Pow->getArgOperand(i: 1);
2375 Function *Callee = Pow->getCalledFunction();
2376 StringRef Name = Callee->getName();
2377 Type *Ty = Pow->getType();
2378 Module *M = Pow->getModule();
2379 bool AllowApprox = Pow->hasApproxFunc();
2380 bool Ignored;
2381
2382 // Propagate the math semantics from the call to any created instructions.
2383 IRBuilderBase::FastMathFlagGuard Guard(B);
2384 B.setFastMathFlags(Pow->getFastMathFlags());
2385 // Evaluate special cases related to the base.
2386
2387 // pow(1.0, x) -> 1.0
2388 if (match(V: Base, P: m_FPOne()))
2389 return Base;
2390
2391 if (Value *Exp = replacePowWithExp(Pow, B))
2392 return Exp;
2393
2394 // Evaluate special cases related to the exponent.
2395
2396 // pow(x, -1.0) -> 1.0 / x
2397 if (match(V: Expo, P: m_SpecificFP(V: -1.0)))
2398 return B.CreateFDiv(L: ConstantFP::get(Ty, V: 1.0), R: Base, Name: "reciprocal");
2399
2400 // pow(x, +/-0.0) -> 1.0
2401 if (match(V: Expo, P: m_AnyZeroFP()))
2402 return ConstantFP::get(Ty, V: 1.0);
2403
2404 // pow(x, 1.0) -> x
2405 if (match(V: Expo, P: m_FPOne()))
2406 return Base;
2407
2408 // pow(x, 2.0) -> x * x
2409 if (match(V: Expo, P: m_SpecificFP(V: 2.0)) && Pow->doesNotAccessMemory())
2410 return B.CreateFMul(L: Base, R: Base, Name: "square");
2411
2412 if (Value *Sqrt = replacePowWithSqrt(Pow, B))
2413 return Sqrt;
2414
2415 // If we can approximate pow:
2416 // pow(x, n) -> powi(x, n) * sqrt(x) if n has exactly a 0.5 fraction
2417 // pow(x, n) -> powi(x, n) if n is a constant signed integer value
2418 const APFloat *ExpoF;
2419 if (AllowApprox && match(V: Expo, P: m_APFloat(Res&: ExpoF)) &&
2420 !ExpoF->isExactlyValue(V: 0.5) && !ExpoF->isExactlyValue(V: -0.5)) {
2421 APFloat ExpoA(abs(X: *ExpoF));
2422 APFloat ExpoI(*ExpoF);
2423 Value *Sqrt = nullptr;
2424 if (!ExpoA.isInteger()) {
2425 APFloat Expo2 = ExpoA;
2426 // To check if ExpoA is an integer + 0.5, we add it to itself. If there
2427 // is no floating point exception and the result is an integer, then
2428 // ExpoA == integer + 0.5
2429 if (Expo2.add(RHS: ExpoA, RM: APFloat::rmNearestTiesToEven) != APFloat::opOK)
2430 return nullptr;
2431
2432 if (!Expo2.isInteger())
2433 return nullptr;
2434
2435 if (ExpoI.roundToIntegral(RM: APFloat::rmTowardNegative) !=
2436 APFloat::opInexact)
2437 return nullptr;
2438 if (!ExpoI.isInteger())
2439 return nullptr;
2440 ExpoF = &ExpoI;
2441
2442 Sqrt = getSqrtCall(V: Base, Attrs: AttributeList(), NoErrno: Pow->doesNotAccessMemory(), M,
2443 B, TLI);
2444 if (!Sqrt)
2445 return nullptr;
2446 }
2447
2448 // 0.5 fraction is now optionally handled.
2449 // Do pow -> powi for remaining integer exponent
2450 APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false);
2451 if (ExpoF->isInteger() &&
2452 ExpoF->convertToInteger(Result&: IntExpo, RM: APFloat::rmTowardZero, IsExact: &Ignored) ==
2453 APFloat::opOK) {
2454 Value *PowI = copyFlags(
2455 Old: *Pow,
2456 New: createPowWithIntegerExponent(
2457 Base, Expo: ConstantInt::get(Ty: B.getIntNTy(N: TLI->getIntSize()), V: IntExpo),
2458 M, B));
2459
2460 if (PowI && Sqrt)
2461 return B.CreateFMul(L: PowI, R: Sqrt);
2462
2463 return PowI;
2464 }
2465 }
2466
2467 // powf(x, itofp(y)) -> powi(x, y)
2468 if (AllowApprox && (isa<SIToFPInst>(Val: Expo) || isa<UIToFPInst>(Val: Expo))) {
2469 if (Value *ExpoI = getIntToFPVal(I2F: Expo, B, DstWidth: TLI->getIntSize()))
2470 return copyFlags(Old: *Pow, New: createPowWithIntegerExponent(Base, Expo: ExpoI, M, B));
2471 }
2472
2473 // Shrink pow() to powf() if the arguments are single precision,
2474 // unless the result is expected to be double precision.
2475 if (UnsafeFPShrink && Name == TLI->getName(F: LibFunc_pow) &&
2476 hasFloatVersion(M, FuncName: Name)) {
2477 if (Value *Shrunk = optimizeBinaryDoubleFP(CI: Pow, B, TLI, isPrecise: true))
2478 return Shrunk;
2479 }
2480
2481 return nullptr;
2482}
2483
2484Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
2485 Module *M = CI->getModule();
2486 Function *Callee = CI->getCalledFunction();
2487 StringRef Name = Callee->getName();
2488 Value *Ret = nullptr;
2489 if (UnsafeFPShrink && Name == TLI->getName(F: LibFunc_exp2) &&
2490 hasFloatVersion(M, FuncName: Name))
2491 Ret = optimizeUnaryDoubleFP(CI, B, TLI, isPrecise: true);
2492
2493 // If we have an llvm.exp2 intrinsic, emit the llvm.ldexp intrinsic. If we
2494 // have the libcall, emit the libcall.
2495 //
2496 // TODO: In principle we should be able to just always use the intrinsic for
2497 // any doesNotAccessMemory callsite.
2498
2499 const bool UseIntrinsic = Callee->isIntrinsic();
2500 // Bail out for vectors because the code below only expects scalars.
2501 Type *Ty = CI->getType();
2502 if (!UseIntrinsic && Ty->isVectorTy())
2503 return Ret;
2504
2505 // exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= IntSize
2506 // exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < IntSize
2507 Value *Op = CI->getArgOperand(i: 0);
2508 if ((isa<SIToFPInst>(Val: Op) || isa<UIToFPInst>(Val: Op)) &&
2509 (UseIntrinsic ||
2510 hasFloatFn(M, TLI, Ty, DoubleFn: LibFunc_ldexp, FloatFn: LibFunc_ldexpf, LongDoubleFn: LibFunc_ldexpl))) {
2511 if (Value *Exp = getIntToFPVal(I2F: Op, B, DstWidth: TLI->getIntSize())) {
2512 Constant *One = ConstantFP::get(Ty, V: 1.0);
2513
2514 if (UseIntrinsic) {
2515 return copyFlags(Old: *CI, New: B.CreateIntrinsic(ID: Intrinsic::ldexp,
2516 OverloadTypes: {Ty, Exp->getType()},
2517 Args: {One, Exp}, FMFSource: CI));
2518 }
2519
2520 IRBuilderBase::FastMathFlagGuard Guard(B);
2521 B.setFastMathFlags(CI->getFastMathFlags());
2522 return copyFlags(Old: *CI, New: emitBinaryFloatFnCall(
2523 Op1: One, Op2: Exp, TLI, DoubleFn: LibFunc_ldexp, FloatFn: LibFunc_ldexpf,
2524 LongDoubleFn: LibFunc_ldexpl, B, Attrs: AttributeList()));
2525 }
2526 }
2527
2528 return Ret;
2529}
2530
2531Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilderBase &B,
2532 Intrinsic::ID IID) {
2533 // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to
2534 // the intrinsics for improved optimization (for example, vectorization).
2535 // No-signed-zeros is implied by the definitions of fmax/fmin themselves.
2536 // From the C standard draft WG14/N1256:
2537 // "Ideally, fmax would be sensitive to the sign of zero, for example
2538 // fmax(-0.0, +0.0) would return +0; however, implementation in software
2539 // might be impractical."
2540 FastMathFlags FMF = CI->getFastMathFlags();
2541 FMF.setNoSignedZeros();
2542 return copyFlags(Old: *CI, New: B.CreateBinaryIntrinsic(ID: IID, LHS: CI->getArgOperand(i: 0),
2543 RHS: CI->getArgOperand(i: 1), FMFSource: FMF));
2544}
2545
2546Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
2547 Function *LogFn = Log->getCalledFunction();
2548 StringRef LogNm = LogFn->getName();
2549 Intrinsic::ID LogID = LogFn->getIntrinsicID();
2550 Module *Mod = Log->getModule();
2551 Type *Ty = Log->getType();
2552
2553 if (UnsafeFPShrink && hasFloatVersion(M: Mod, FuncName: LogNm))
2554 if (Value *Ret = optimizeUnaryDoubleFP(CI: Log, B, TLI, isPrecise: true))
2555 return Ret;
2556
2557 LibFunc LogLb, ExpLb, Exp2Lb, Exp10Lb, PowLb;
2558
2559 // This is only applicable to log(), log2(), log10().
2560 if (TLI->getLibFunc(funcName: LogNm, F&: LogLb)) {
2561 switch (LogLb) {
2562 case LibFunc_logf:
2563 LogID = Intrinsic::log;
2564 ExpLb = LibFunc_expf;
2565 Exp2Lb = LibFunc_exp2f;
2566 Exp10Lb = LibFunc_exp10f;
2567 PowLb = LibFunc_powf;
2568 break;
2569 case LibFunc_log:
2570 LogID = Intrinsic::log;
2571 ExpLb = LibFunc_exp;
2572 Exp2Lb = LibFunc_exp2;
2573 Exp10Lb = LibFunc_exp10;
2574 PowLb = LibFunc_pow;
2575 break;
2576 case LibFunc_logl:
2577 LogID = Intrinsic::log;
2578 ExpLb = LibFunc_expl;
2579 Exp2Lb = LibFunc_exp2l;
2580 Exp10Lb = LibFunc_exp10l;
2581 PowLb = LibFunc_powl;
2582 break;
2583 case LibFunc_log2f:
2584 LogID = Intrinsic::log2;
2585 ExpLb = LibFunc_expf;
2586 Exp2Lb = LibFunc_exp2f;
2587 Exp10Lb = LibFunc_exp10f;
2588 PowLb = LibFunc_powf;
2589 break;
2590 case LibFunc_log2:
2591 LogID = Intrinsic::log2;
2592 ExpLb = LibFunc_exp;
2593 Exp2Lb = LibFunc_exp2;
2594 Exp10Lb = LibFunc_exp10;
2595 PowLb = LibFunc_pow;
2596 break;
2597 case LibFunc_log2l:
2598 LogID = Intrinsic::log2;
2599 ExpLb = LibFunc_expl;
2600 Exp2Lb = LibFunc_exp2l;
2601 Exp10Lb = LibFunc_exp10l;
2602 PowLb = LibFunc_powl;
2603 break;
2604 case LibFunc_log10f:
2605 LogID = Intrinsic::log10;
2606 ExpLb = LibFunc_expf;
2607 Exp2Lb = LibFunc_exp2f;
2608 Exp10Lb = LibFunc_exp10f;
2609 PowLb = LibFunc_powf;
2610 break;
2611 case LibFunc_log10:
2612 LogID = Intrinsic::log10;
2613 ExpLb = LibFunc_exp;
2614 Exp2Lb = LibFunc_exp2;
2615 Exp10Lb = LibFunc_exp10;
2616 PowLb = LibFunc_pow;
2617 break;
2618 case LibFunc_log10l:
2619 LogID = Intrinsic::log10;
2620 ExpLb = LibFunc_expl;
2621 Exp2Lb = LibFunc_exp2l;
2622 Exp10Lb = LibFunc_exp10l;
2623 PowLb = LibFunc_powl;
2624 break;
2625 default:
2626 return nullptr;
2627 }
2628
2629 // Convert libcall to intrinsic if the value is known > 0.
2630 bool IsKnownNoErrno = Log->hasNoNaNs() && Log->hasNoInfs();
2631 if (!IsKnownNoErrno) {
2632 SimplifyQuery SQ(DL, TLI, DT, AC, Log, true, true, DC);
2633 KnownFPClass Known = computeKnownFPClass(
2634 V: Log->getOperand(i_nocapture: 0),
2635 InterestedClasses: KnownFPClass::OrderedLessThanZeroMask | fcSubnormal, SQ);
2636 Function *F = Log->getParent()->getParent();
2637 const fltSemantics &FltSem = Ty->getScalarType()->getFltSemantics();
2638 IsKnownNoErrno =
2639 Known.cannotBeOrderedLessThanZero() &&
2640 Known.isKnownNeverLogicalZero(Mode: F->getDenormalMode(FPType: FltSem));
2641 }
2642 if (IsKnownNoErrno) {
2643 Value *NewLog = B.CreateUnaryIntrinsic(ID: LogID, Op: Log->getArgOperand(i: 0), FMFSource: Log);
2644 if (auto *I = dyn_cast<Instruction>(Val: NewLog)) {
2645 I->copyMetadata(SrcInst: *Log);
2646 return copyFlags(Old: *Log, New: I);
2647 }
2648 return NewLog;
2649 }
2650 } else if (LogID == Intrinsic::log || LogID == Intrinsic::log2 ||
2651 LogID == Intrinsic::log10) {
2652 if (Ty->getScalarType()->isFloatTy()) {
2653 ExpLb = LibFunc_expf;
2654 Exp2Lb = LibFunc_exp2f;
2655 Exp10Lb = LibFunc_exp10f;
2656 PowLb = LibFunc_powf;
2657 } else if (Ty->getScalarType()->isDoubleTy()) {
2658 ExpLb = LibFunc_exp;
2659 Exp2Lb = LibFunc_exp2;
2660 Exp10Lb = LibFunc_exp10;
2661 PowLb = LibFunc_pow;
2662 } else
2663 return nullptr;
2664 } else
2665 return nullptr;
2666
2667 // The earlier call must also be 'fast' in order to do these transforms.
2668 CallInst *Arg = dyn_cast<CallInst>(Val: Log->getArgOperand(i: 0));
2669 if (!Log->isFast() || !Arg || !Arg->isFast() || !Arg->hasOneUse())
2670 return nullptr;
2671
2672 IRBuilderBase::FastMathFlagGuard Guard(B);
2673 B.setFastMathFlags(FastMathFlags::getFast());
2674
2675 Intrinsic::ID ArgID = Arg->getIntrinsicID();
2676 LibFunc ArgLb = NotLibFunc;
2677 TLI->getLibFunc(CB: *Arg, F&: ArgLb);
2678
2679 // log(pow(x,y)) -> y*log(x)
2680 AttributeList NoAttrs;
2681 if (ArgLb == PowLb || ArgID == Intrinsic::pow || ArgID == Intrinsic::powi) {
2682 Value *LogX =
2683 Log->doesNotAccessMemory()
2684 ? B.CreateUnaryIntrinsic(ID: LogID, Op: Arg->getOperand(i_nocapture: 0), FMFSource: nullptr, Name: "log")
2685 : emitUnaryFloatFnCall(Op: Arg->getOperand(i_nocapture: 0), TLI, Name: LogNm, B, Attrs: NoAttrs);
2686 Value *Y = Arg->getArgOperand(i: 1);
2687 // Cast exponent to FP if integer.
2688 if (ArgID == Intrinsic::powi)
2689 Y = B.CreateSIToFP(V: Y, DestTy: Ty, Name: "cast");
2690 Value *MulY = B.CreateFMul(L: Y, R: LogX, Name: "mul");
2691 // Since pow() may have side effects, e.g. errno,
2692 // dead code elimination may not be trusted to remove it.
2693 substituteInParent(I: Arg, With: MulY);
2694 return MulY;
2695 }
2696
2697 // log(exp{,2,10}(y)) -> y*log({e,2,10})
2698 // TODO: There is no exp10() intrinsic yet.
2699 if (ArgLb == ExpLb || ArgLb == Exp2Lb || ArgLb == Exp10Lb ||
2700 ArgID == Intrinsic::exp || ArgID == Intrinsic::exp2) {
2701 Constant *Eul;
2702 if (ArgLb == ExpLb || ArgID == Intrinsic::exp)
2703 // FIXME: Add more precise value of e for long double.
2704 Eul = ConstantFP::get(Ty: Log->getType(), V: numbers::e);
2705 else if (ArgLb == Exp2Lb || ArgID == Intrinsic::exp2)
2706 Eul = ConstantFP::get(Ty: Log->getType(), V: 2.0);
2707 else
2708 Eul = ConstantFP::get(Ty: Log->getType(), V: 10.0);
2709 Value *LogE = Log->doesNotAccessMemory()
2710 ? B.CreateUnaryIntrinsic(ID: LogID, Op: Eul, FMFSource: nullptr, Name: "log")
2711 : emitUnaryFloatFnCall(Op: Eul, TLI, Name: LogNm, B, Attrs: NoAttrs);
2712 Value *MulY = B.CreateFMul(L: Arg->getArgOperand(i: 0), R: LogE, Name: "mul");
2713 // Since exp() may have side effects, e.g. errno,
2714 // dead code elimination may not be trusted to remove it.
2715 substituteInParent(I: Arg, With: MulY);
2716 return MulY;
2717 }
2718
2719 return nullptr;
2720}
2721
2722// sqrt(exp(X)) -> exp(X * 0.5)
2723Value *LibCallSimplifier::mergeSqrtToExp(CallInst *CI, IRBuilderBase &B) {
2724 if (!CI->hasAllowReassoc())
2725 return nullptr;
2726
2727 Function *SqrtFn = CI->getCalledFunction();
2728 CallInst *Arg = dyn_cast<CallInst>(Val: CI->getArgOperand(i: 0));
2729 if (!Arg || !Arg->hasAllowReassoc() || !Arg->hasOneUse())
2730 return nullptr;
2731 Intrinsic::ID ArgID = Arg->getIntrinsicID();
2732 LibFunc ArgLb = NotLibFunc;
2733 TLI->getLibFunc(CB: *Arg, F&: ArgLb);
2734
2735 LibFunc SqrtLb, ExpLb, Exp2Lb, Exp10Lb;
2736
2737 if (TLI->getLibFunc(funcName: SqrtFn->getName(), F&: SqrtLb))
2738 switch (SqrtLb) {
2739 case LibFunc_sqrtf:
2740 ExpLb = LibFunc_expf;
2741 Exp2Lb = LibFunc_exp2f;
2742 Exp10Lb = LibFunc_exp10f;
2743 break;
2744 case LibFunc_sqrt:
2745 ExpLb = LibFunc_exp;
2746 Exp2Lb = LibFunc_exp2;
2747 Exp10Lb = LibFunc_exp10;
2748 break;
2749 case LibFunc_sqrtl:
2750 ExpLb = LibFunc_expl;
2751 Exp2Lb = LibFunc_exp2l;
2752 Exp10Lb = LibFunc_exp10l;
2753 break;
2754 default:
2755 return nullptr;
2756 }
2757 else if (SqrtFn->getIntrinsicID() == Intrinsic::sqrt) {
2758 if (CI->getType()->getScalarType()->isFloatTy()) {
2759 ExpLb = LibFunc_expf;
2760 Exp2Lb = LibFunc_exp2f;
2761 Exp10Lb = LibFunc_exp10f;
2762 } else if (CI->getType()->getScalarType()->isDoubleTy()) {
2763 ExpLb = LibFunc_exp;
2764 Exp2Lb = LibFunc_exp2;
2765 Exp10Lb = LibFunc_exp10;
2766 } else
2767 return nullptr;
2768 } else
2769 return nullptr;
2770
2771 if (ArgLb != ExpLb && ArgLb != Exp2Lb && ArgLb != Exp10Lb &&
2772 ArgID != Intrinsic::exp && ArgID != Intrinsic::exp2)
2773 return nullptr;
2774
2775 IRBuilderBase::InsertPointGuard Guard(B);
2776 B.SetInsertPoint(Arg);
2777 auto *ExpOperand = Arg->getOperand(i_nocapture: 0);
2778 auto *FMul =
2779 B.CreateFMulFMF(L: ExpOperand, R: ConstantFP::get(Ty: ExpOperand->getType(), V: 0.5),
2780 FMFSource: CI, Name: "merged.sqrt");
2781
2782 Arg->setOperand(i_nocapture: 0, Val_nocapture: FMul);
2783 return Arg;
2784}
2785
2786Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
2787 Module *M = CI->getModule();
2788 Function *Callee = CI->getCalledFunction();
2789 Value *Ret = nullptr;
2790 // TODO: Once we have a way (other than checking for the existince of the
2791 // libcall) to tell whether our target can lower @llvm.sqrt, relax the
2792 // condition below.
2793 if (isLibFuncEmittable(M, TLI, TheLibFunc: LibFunc_sqrtf) &&
2794 (Callee->getName() == "sqrt" ||
2795 Callee->getIntrinsicID() == Intrinsic::sqrt))
2796 Ret = optimizeUnaryDoubleFP(CI, B, TLI, isPrecise: true);
2797
2798 if (Value *Opt = mergeSqrtToExp(CI, B))
2799 return Opt;
2800
2801 if (!CI->isFast())
2802 return Ret;
2803
2804 Instruction *I = dyn_cast<Instruction>(Val: CI->getArgOperand(i: 0));
2805 if (!I || I->getOpcode() != Instruction::FMul || !I->isFast())
2806 return Ret;
2807
2808 // We're looking for a repeated factor in a multiplication tree,
2809 // so we can do this fold: sqrt(x * x) -> fabs(x);
2810 // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y).
2811 Value *Op0 = I->getOperand(i: 0);
2812 Value *Op1 = I->getOperand(i: 1);
2813 Value *RepeatOp = nullptr;
2814 Value *OtherOp = nullptr;
2815 if (Op0 == Op1) {
2816 // Simple match: the operands of the multiply are identical.
2817 RepeatOp = Op0;
2818 } else {
2819 // Look for a more complicated pattern: one of the operands is itself
2820 // a multiply, so search for a common factor in that multiply.
2821 // Note: We don't bother looking any deeper than this first level or for
2822 // variations of this pattern because instcombine's visitFMUL and/or the
2823 // reassociation pass should give us this form.
2824 Value *MulOp;
2825 if (match(V: Op0, P: m_FMul(L: m_Value(V&: MulOp), R: m_Deferred(V: MulOp))) &&
2826 cast<Instruction>(Val: Op0)->isFast()) {
2827 // Pattern: sqrt((x * x) * z)
2828 RepeatOp = MulOp;
2829 OtherOp = Op1;
2830 } else if (match(V: Op1, P: m_FMul(L: m_Value(V&: MulOp), R: m_Deferred(V: MulOp))) &&
2831 cast<Instruction>(Val: Op1)->isFast()) {
2832 // Pattern: sqrt(z * (x * x))
2833 RepeatOp = MulOp;
2834 OtherOp = Op0;
2835 }
2836 }
2837 if (!RepeatOp)
2838 return Ret;
2839
2840 // Fast math flags for any created instructions should match the sqrt
2841 // and multiply.
2842
2843 // If we found a repeated factor, hoist it out of the square root and
2844 // replace it with the fabs of that factor.
2845 Value *FabsCall = B.CreateFAbs(V: RepeatOp, FMFSource: I, Name: "fabs");
2846 if (OtherOp) {
2847 // If we found a non-repeated factor, we still need to get its square
2848 // root. We then multiply that by the value that was simplified out
2849 // of the square root calculation.
2850 Value *SqrtCall =
2851 B.CreateUnaryIntrinsic(ID: Intrinsic::sqrt, Op: OtherOp, FMFSource: I, Name: "sqrt");
2852 return copyFlags(Old: *CI, New: B.CreateFMulFMF(L: FabsCall, R: SqrtCall, FMFSource: I));
2853 }
2854 return copyFlags(Old: *CI, New: FabsCall);
2855}
2856
2857Value *LibCallSimplifier::optimizeFMod(CallInst *CI, IRBuilderBase &B) {
2858
2859 // fmod(x,y) sets errno if y == 0 or x == +/-inf. frem does not set errno,
2860 // so the fold is valid only when we can prove fmod wouldn't either.
2861 bool IsNoErrno = CI->hasNoNaNs();
2862 if (!IsNoErrno) {
2863 SimplifyQuery SQ(DL, TLI, DT, AC, CI, true, true, DC);
2864 KnownFPClass Known0 = computeKnownFPClass(V: CI->getOperand(i_nocapture: 0), InterestedClasses: fcInf, SQ);
2865 if (Known0.isKnownNeverInfinity()) {
2866 KnownFPClass Known1 =
2867 computeKnownFPClass(V: CI->getOperand(i_nocapture: 1), InterestedClasses: fcZero | fcSubnormal, SQ);
2868 Function *F = CI->getParent()->getParent();
2869 const fltSemantics &FltSem =
2870 CI->getType()->getScalarType()->getFltSemantics();
2871 IsNoErrno = Known1.isKnownNeverLogicalZero(Mode: F->getDenormalMode(FPType: FltSem));
2872 }
2873 }
2874
2875 if (IsNoErrno)
2876 return B.CreateFRemFMF(L: CI->getOperand(i_nocapture: 0), R: CI->getOperand(i_nocapture: 1), FMFSource: CI);
2877 return nullptr;
2878}
2879
2880Value *LibCallSimplifier::optimizeTrigInversionPairs(CallInst *CI,
2881 IRBuilderBase &B) {
2882 Module *M = CI->getModule();
2883 Function *Callee = CI->getCalledFunction();
2884 Value *Ret = nullptr;
2885 StringRef Name = Callee->getName();
2886 if (UnsafeFPShrink &&
2887 (Name == "tan" || Name == "atanh" || Name == "sinh" || Name == "cosh" ||
2888 Name == "asinh") &&
2889 hasFloatVersion(M, FuncName: Name))
2890 Ret = optimizeUnaryDoubleFP(CI, B, TLI, isPrecise: true);
2891
2892 Value *Op1 = CI->getArgOperand(i: 0);
2893 auto *OpC = dyn_cast<CallInst>(Val: Op1);
2894 if (!OpC)
2895 return Ret;
2896
2897 // Both calls must be 'fast' in order to remove them.
2898 if (!CI->isFast() || !OpC->isFast())
2899 return Ret;
2900
2901 // tan(atan(x)) -> x
2902 // atanh(tanh(x)) -> x
2903 // sinh(asinh(x)) -> x
2904 // asinh(sinh(x)) -> x
2905 // cosh(acosh(x)) -> x
2906 LibFunc Func;
2907 Function *F = OpC->getCalledFunction();
2908 if (F && TLI->getLibFunc(funcName: F->getName(), F&: Func) &&
2909 isLibFuncEmittable(M, TLI, TheLibFunc: Func)) {
2910 LibFunc inverseFunc = llvm::StringSwitch<LibFunc>(Callee->getName())
2911 .Case(S: "tan", Value: LibFunc_atan)
2912 .Case(S: "atanh", Value: LibFunc_tanh)
2913 .Case(S: "sinh", Value: LibFunc_asinh)
2914 .Case(S: "cosh", Value: LibFunc_acosh)
2915 .Case(S: "tanf", Value: LibFunc_atanf)
2916 .Case(S: "atanhf", Value: LibFunc_tanhf)
2917 .Case(S: "sinhf", Value: LibFunc_asinhf)
2918 .Case(S: "coshf", Value: LibFunc_acoshf)
2919 .Case(S: "tanl", Value: LibFunc_atanl)
2920 .Case(S: "atanhl", Value: LibFunc_tanhl)
2921 .Case(S: "sinhl", Value: LibFunc_asinhl)
2922 .Case(S: "coshl", Value: LibFunc_acoshl)
2923 .Case(S: "asinh", Value: LibFunc_sinh)
2924 .Case(S: "asinhf", Value: LibFunc_sinhf)
2925 .Case(S: "asinhl", Value: LibFunc_sinhl)
2926 .Default(Value: NotLibFunc); // Used as error value
2927 if (Func == inverseFunc)
2928 Ret = OpC->getArgOperand(i: 0);
2929 }
2930 return Ret;
2931}
2932
2933static bool isTrigLibCall(CallInst *CI) {
2934 // We can only hope to do anything useful if we can ignore things like errno
2935 // and floating-point exceptions.
2936 // We already checked the prototype.
2937 return CI->doesNotThrow() && CI->doesNotAccessMemory();
2938}
2939
2940static bool insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg,
2941 bool UseFloat, Value *&Sin, Value *&Cos,
2942 Value *&SinCos, const TargetLibraryInfo *TLI) {
2943 Module *M = OrigCallee->getParent();
2944 Type *ArgTy = Arg->getType();
2945 Type *ResTy;
2946 StringRef Name;
2947
2948 Triple T(OrigCallee->getParent()->getTargetTriple());
2949 if (UseFloat) {
2950 Name = "__sincospif_stret";
2951
2952 assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
2953 // x86_64 can't use {float, float} since that would be returned in both
2954 // xmm0 and xmm1, which isn't what a real struct would do.
2955 ResTy = T.getArch() == Triple::x86_64
2956 ? static_cast<Type *>(FixedVectorType::get(ElementType: ArgTy, NumElts: 2))
2957 : static_cast<Type *>(StructType::get(elt1: ArgTy, elts: ArgTy));
2958 } else {
2959 Name = "__sincospi_stret";
2960 ResTy = StructType::get(elt1: ArgTy, elts: ArgTy);
2961 }
2962
2963 if (!isLibFuncEmittable(M, TLI, Name))
2964 return false;
2965 LibFunc TheLibFunc;
2966 TLI->getLibFunc(funcName: Name, F&: TheLibFunc);
2967 FunctionCallee Callee = getOrInsertLibFunc(
2968 M, TLI: *TLI, TheLibFunc, AttributeList: OrigCallee->getAttributes(), RetTy: ResTy, Args: ArgTy);
2969
2970 if (Instruction *ArgInst = dyn_cast<Instruction>(Val: Arg)) {
2971 // If the argument is an instruction, it must dominate all uses so put our
2972 // sincos call there.
2973 B.SetInsertPoint(TheBB: ArgInst->getParent(), IP: ++ArgInst->getIterator());
2974 } else {
2975 // Otherwise (e.g. for a constant) the beginning of the function is as
2976 // good a place as any.
2977 BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
2978 B.SetInsertPoint(TheBB: &EntryBB, IP: EntryBB.begin());
2979 }
2980
2981 SinCos = B.CreateCall(Callee, Args: Arg, Name: "sincospi");
2982
2983 if (SinCos->getType()->isStructTy()) {
2984 Sin = B.CreateExtractValue(Agg: SinCos, Idxs: 0, Name: "sinpi");
2985 Cos = B.CreateExtractValue(Agg: SinCos, Idxs: 1, Name: "cospi");
2986 } else {
2987 Sin = B.CreateExtractElement(Vec: SinCos, Idx: ConstantInt::get(Ty: B.getInt32Ty(), V: 0),
2988 Name: "sinpi");
2989 Cos = B.CreateExtractElement(Vec: SinCos, Idx: ConstantInt::get(Ty: B.getInt32Ty(), V: 1),
2990 Name: "cospi");
2991 }
2992
2993 return true;
2994}
2995
2996static Value *optimizeSymmetricCall(CallInst *CI, bool IsEven,
2997 IRBuilderBase &B) {
2998 Value *X;
2999 Value *Src = CI->getArgOperand(i: 0);
3000
3001 if (match(V: Src, P: m_OneUse(SubPattern: m_FNeg(X: m_Value(V&: X))))) {
3002 auto *Call = B.CreateCall(Callee: CI->getCalledFunction(), Args: {X});
3003 Call->copyFastMathFlags(I: CI);
3004 auto *CallInst = copyFlags(Old: *CI, New: Call);
3005 if (IsEven) {
3006 // Even function: f(-x) = f(x)
3007 return CallInst;
3008 }
3009 // Odd function: f(-x) = -f(x)
3010 return B.CreateFNegFMF(V: CallInst, FMFSource: CI);
3011 }
3012
3013 // Even function: f(abs(x)) = f(x), f(copysign(x, y)) = f(x)
3014 if (IsEven && (match(V: Src, P: m_FAbs(Op0: m_Value(V&: X))) ||
3015 match(V: Src, P: m_CopySign(Op0: m_Value(V&: X), Op1: m_Value())))) {
3016 auto *Call = B.CreateCall(Callee: CI->getCalledFunction(), Args: {X});
3017 Call->copyFastMathFlags(I: CI);
3018 return copyFlags(Old: *CI, New: Call);
3019 }
3020
3021 return nullptr;
3022}
3023
3024Value *LibCallSimplifier::optimizeSymmetric(CallInst *CI, LibFunc Func,
3025 IRBuilderBase &B) {
3026 switch (Func) {
3027 case LibFunc_cos:
3028 case LibFunc_cosf:
3029 case LibFunc_cosl:
3030
3031 case LibFunc_cosh:
3032 case LibFunc_coshf:
3033 case LibFunc_coshl:
3034 return optimizeSymmetricCall(CI, /*IsEven*/ true, B);
3035
3036 case LibFunc_sin:
3037 case LibFunc_sinf:
3038 case LibFunc_sinl:
3039
3040 case LibFunc_sinh:
3041 case LibFunc_sinhf:
3042 case LibFunc_sinhl:
3043
3044 case LibFunc_tan:
3045 case LibFunc_tanf:
3046 case LibFunc_tanl:
3047
3048 case LibFunc_tanh:
3049 case LibFunc_tanhf:
3050 case LibFunc_tanhl:
3051
3052 case LibFunc_erf:
3053 case LibFunc_erff:
3054 case LibFunc_erfl:
3055 return optimizeSymmetricCall(CI, /*IsEven*/ false, B);
3056
3057 default:
3058 return nullptr;
3059 }
3060}
3061
3062Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, bool IsSin, IRBuilderBase &B) {
3063 // Make sure the prototype is as expected, otherwise the rest of the
3064 // function is probably invalid and likely to abort.
3065 if (!isTrigLibCall(CI))
3066 return nullptr;
3067
3068 Value *Arg = CI->getArgOperand(i: 0);
3069 if (isa<ConstantData>(Val: Arg))
3070 return nullptr;
3071
3072 SmallVector<CallInst *, 1> SinCalls;
3073 SmallVector<CallInst *, 1> CosCalls;
3074 SmallVector<CallInst *, 1> SinCosCalls;
3075
3076 bool IsFloat = Arg->getType()->isFloatTy();
3077
3078 // Look for all compatible sinpi, cospi and sincospi calls with the same
3079 // argument. If there are enough (in some sense) we can make the
3080 // substitution.
3081 Function *F = CI->getFunction();
3082 for (User *U : Arg->users())
3083 classifyArgUse(Val: U, F, IsFloat, SinCalls, CosCalls, SinCosCalls);
3084
3085 // It's only worthwhile if both sinpi and cospi are actually used.
3086 if (SinCalls.empty() || CosCalls.empty())
3087 return nullptr;
3088
3089 Value *Sin, *Cos, *SinCos;
3090 if (!insertSinCosCall(B, OrigCallee: CI->getCalledFunction(), Arg, UseFloat: IsFloat, Sin, Cos,
3091 SinCos, TLI))
3092 return nullptr;
3093
3094 auto replaceTrigInsts = [this](SmallVectorImpl<CallInst *> &Calls,
3095 Value *Res) {
3096 for (CallInst *C : Calls)
3097 replaceAllUsesWith(I: C, With: Res);
3098 };
3099
3100 replaceTrigInsts(SinCalls, Sin);
3101 replaceTrigInsts(CosCalls, Cos);
3102 replaceTrigInsts(SinCosCalls, SinCos);
3103
3104 return IsSin ? Sin : Cos;
3105}
3106
3107void LibCallSimplifier::classifyArgUse(
3108 Value *Val, Function *F, bool IsFloat,
3109 SmallVectorImpl<CallInst *> &SinCalls,
3110 SmallVectorImpl<CallInst *> &CosCalls,
3111 SmallVectorImpl<CallInst *> &SinCosCalls) {
3112 auto *CI = dyn_cast<CallInst>(Val);
3113 if (!CI || CI->use_empty())
3114 return;
3115
3116 // Don't consider calls in other functions.
3117 if (CI->getFunction() != F)
3118 return;
3119
3120 Module *M = CI->getModule();
3121 Function *Callee = CI->getCalledFunction();
3122 LibFunc Func;
3123 if (!Callee || !TLI->getLibFunc(FDecl: *Callee, F&: Func) ||
3124 !isLibFuncEmittable(M, TLI, TheLibFunc: Func) ||
3125 !isTrigLibCall(CI))
3126 return;
3127
3128 if (IsFloat) {
3129 if (Func == LibFunc_sinpif)
3130 SinCalls.push_back(Elt: CI);
3131 else if (Func == LibFunc_cospif)
3132 CosCalls.push_back(Elt: CI);
3133 else if (Func == LibFunc_sincospif_stret)
3134 SinCosCalls.push_back(Elt: CI);
3135 } else {
3136 if (Func == LibFunc_sinpi)
3137 SinCalls.push_back(Elt: CI);
3138 else if (Func == LibFunc_cospi)
3139 CosCalls.push_back(Elt: CI);
3140 else if (Func == LibFunc_sincospi_stret)
3141 SinCosCalls.push_back(Elt: CI);
3142 }
3143}
3144
3145/// Constant folds remquo
3146Value *LibCallSimplifier::optimizeRemquo(CallInst *CI, IRBuilderBase &B) {
3147 const APFloat *X, *Y;
3148 if (!match(V: CI->getArgOperand(i: 0), P: m_APFloat(Res&: X)) ||
3149 !match(V: CI->getArgOperand(i: 1), P: m_APFloat(Res&: Y)))
3150 return nullptr;
3151
3152 APFloat::opStatus Status;
3153 APFloat Quot = *X;
3154 Status = Quot.divide(RHS: *Y, RM: APFloat::rmNearestTiesToEven);
3155 if (Status != APFloat::opOK && Status != APFloat::opInexact)
3156 return nullptr;
3157 APFloat Rem = *X;
3158 if (Rem.remainder(RHS: *Y) != APFloat::opOK)
3159 return nullptr;
3160
3161 // TODO: We can only keep at least the three of the last bits of x/y
3162 unsigned IntBW = TLI->getIntSize();
3163 APSInt QuotInt(IntBW, /*isUnsigned=*/false);
3164 bool IsExact;
3165 Status =
3166 Quot.convertToInteger(Result&: QuotInt, RM: APFloat::rmNearestTiesToEven, IsExact: &IsExact);
3167 if (Status != APFloat::opOK && Status != APFloat::opInexact)
3168 return nullptr;
3169
3170 B.CreateAlignedStore(
3171 Val: ConstantInt::getSigned(Ty: B.getIntNTy(N: IntBW), V: QuotInt.getExtValue()),
3172 Ptr: CI->getArgOperand(i: 2), Align: CI->getParamAlign(ArgNo: 2));
3173 return ConstantFP::get(Ty: CI->getType(), V: Rem);
3174}
3175
3176/// Constant folds fdim
3177Value *LibCallSimplifier::optimizeFdim(CallInst *CI, IRBuilderBase &B) {
3178 // Cannot perform the fold unless the call has attribute memory(none)
3179 if (!CI->doesNotAccessMemory())
3180 return nullptr;
3181
3182 // TODO : Handle undef values
3183 // Propagate poison if any
3184 if (isa<PoisonValue>(Val: CI->getArgOperand(i: 0)))
3185 return CI->getArgOperand(i: 0);
3186 if (isa<PoisonValue>(Val: CI->getArgOperand(i: 1)))
3187 return CI->getArgOperand(i: 1);
3188
3189 const APFloat *X, *Y;
3190 // Check if both values are constants
3191 if (!match(V: CI->getArgOperand(i: 0), P: m_APFloat(Res&: X)) ||
3192 !match(V: CI->getArgOperand(i: 1), P: m_APFloat(Res&: Y)))
3193 return nullptr;
3194
3195 // C99 fdim(x, y) = (x > y) ? x - y : +0.
3196 if (X->compare(RHS: *Y) != APFloat::cmpGreaterThan && !X->isNaN() && !Y->isNaN())
3197 return ConstantFP::getZero(Ty: CI->getType());
3198 APFloat Difference = *X;
3199 Difference.subtract(RHS: *Y, RM: RoundingMode::NearestTiesToEven);
3200 return ConstantFP::get(Ty: CI->getType(), V: Difference);
3201}
3202
3203//===----------------------------------------------------------------------===//
3204// Integer Library Call Optimizations
3205//===----------------------------------------------------------------------===//
3206
3207Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilderBase &B) {
3208 // All variants of ffs return int which need not be 32 bits wide.
3209 // ffs{,l,ll}(x) -> x != 0 ? (int)llvm.cttz(x)+1 : 0
3210 Type *RetType = CI->getType();
3211 Value *Op = CI->getArgOperand(i: 0);
3212 Type *ArgType = Op->getType();
3213 Value *V = B.CreateIntrinsic(ID: Intrinsic::cttz, OverloadTypes: {ArgType}, Args: {Op, B.getTrue()},
3214 FMFSource: nullptr, Name: "cttz");
3215 V = B.CreateAdd(LHS: V, RHS: ConstantInt::get(Ty: V->getType(), V: 1));
3216 V = B.CreateIntCast(V, DestTy: RetType, isSigned: false);
3217
3218 Value *Cond = B.CreateICmpNE(LHS: Op, RHS: Constant::getNullValue(Ty: ArgType));
3219 return B.CreateSelect(C: Cond, True: V, False: ConstantInt::get(Ty: RetType, V: 0));
3220}
3221
3222Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilderBase &B) {
3223 // All variants of fls return int which need not be 32 bits wide.
3224 // fls{,l,ll}(x) -> (int)(sizeInBits(x) - llvm.ctlz(x, false))
3225 Value *Op = CI->getArgOperand(i: 0);
3226 Type *ArgType = Op->getType();
3227 Value *V = B.CreateIntrinsic(ID: Intrinsic::ctlz, OverloadTypes: {ArgType}, Args: {Op, B.getFalse()},
3228 FMFSource: nullptr, Name: "ctlz");
3229 V = B.CreateSub(LHS: ConstantInt::get(Ty: V->getType(), V: ArgType->getIntegerBitWidth()),
3230 RHS: V);
3231 return B.CreateIntCast(V, DestTy: CI->getType(), isSigned: false);
3232}
3233
3234Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilderBase &B) {
3235 // abs(x) -> x <s 0 ? -x : x
3236 // The negation has 'nsw' because abs of INT_MIN is undefined.
3237 Value *X = CI->getArgOperand(i: 0);
3238 Value *IsNeg = B.CreateIsNeg(Arg: X);
3239 Value *NegX = B.CreateNSWNeg(V: X, Name: "neg");
3240 return B.CreateSelect(C: IsNeg, True: NegX, False: X);
3241}
3242
3243Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilderBase &B) {
3244 // isdigit(c) -> (c-'0') <u 10
3245 Value *Op = CI->getArgOperand(i: 0);
3246 Type *ArgType = Op->getType();
3247 Op = B.CreateSub(LHS: Op, RHS: ConstantInt::get(Ty: ArgType, V: '0'), Name: "isdigittmp");
3248 Op = B.CreateICmpULT(LHS: Op, RHS: ConstantInt::get(Ty: ArgType, V: 10), Name: "isdigit");
3249 return B.CreateZExt(V: Op, DestTy: CI->getType());
3250}
3251
3252Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilderBase &B) {
3253 // isascii(c) -> c <u 128
3254 Value *Op = CI->getArgOperand(i: 0);
3255 Type *ArgType = Op->getType();
3256 Op = B.CreateICmpULT(LHS: Op, RHS: ConstantInt::get(Ty: ArgType, V: 128), Name: "isascii");
3257 return B.CreateZExt(V: Op, DestTy: CI->getType());
3258}
3259
3260Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilderBase &B) {
3261 // toascii(c) -> c & 0x7f
3262 return B.CreateAnd(LHS: CI->getArgOperand(i: 0),
3263 RHS: ConstantInt::get(Ty: CI->getType(), V: 0x7F));
3264}
3265
3266// Fold calls to atoi, atol, and atoll.
3267Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilderBase &B) {
3268 StringRef Str;
3269 if (!getConstantStringInfo(V: CI->getArgOperand(i: 0), Str))
3270 return nullptr;
3271
3272 return convertStrToInt(CI, Str, EndPtr: nullptr, Base: 10, /*AsSigned=*/true, B);
3273}
3274
3275// Fold calls to strtol, strtoll, strtoul, and strtoull.
3276Value *LibCallSimplifier::optimizeStrToInt(CallInst *CI, IRBuilderBase &B,
3277 bool AsSigned) {
3278 Value *EndPtr = CI->getArgOperand(i: 1);
3279 if (isa<ConstantPointerNull>(Val: EndPtr)) {
3280 // With a null EndPtr, this function won't capture the main argument.
3281 // It would be readonly too, except that it still may write to errno.
3282 CI->addParamAttr(ArgNo: 0, Attr: Attribute::getWithCaptureInfo(Context&: CI->getContext(),
3283 CI: CaptureInfo::none()));
3284 EndPtr = nullptr;
3285 } else if (!isKnownNonZero(V: EndPtr, Q: DL))
3286 return nullptr;
3287
3288 StringRef Str;
3289 if (!getConstantStringInfo(V: CI->getArgOperand(i: 0), Str))
3290 return nullptr;
3291
3292 if (ConstantInt *CInt = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 2))) {
3293 return convertStrToInt(CI, Str, EndPtr, Base: CInt->getSExtValue(), AsSigned, B);
3294 }
3295
3296 return nullptr;
3297}
3298
3299//===----------------------------------------------------------------------===//
3300// Formatting and IO Library Call Optimizations
3301//===----------------------------------------------------------------------===//
3302
3303static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg);
3304
3305Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilderBase &B,
3306 int StreamArg) {
3307 Function *Callee = CI->getCalledFunction();
3308 // Error reporting calls should be cold, mark them as such.
3309 // This applies even to non-builtin calls: it is only a hint and applies to
3310 // functions that the frontend might not understand as builtins.
3311
3312 // This heuristic was suggested in:
3313 // Improving Static Branch Prediction in a Compiler
3314 // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu
3315 // Proceedings of PACT'98, Oct. 1998, IEEE
3316 if (!CI->hasFnAttr(Kind: Attribute::Cold) &&
3317 isReportingError(Callee, CI, StreamArg)) {
3318 CI->addFnAttr(Kind: Attribute::Cold);
3319 }
3320
3321 return nullptr;
3322}
3323
3324static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) {
3325 if (!Callee || !Callee->isDeclaration())
3326 return false;
3327
3328 if (StreamArg < 0)
3329 return true;
3330
3331 // These functions might be considered cold, but only if their stream
3332 // argument is stderr.
3333
3334 if (StreamArg >= (int)CI->arg_size())
3335 return false;
3336 LoadInst *LI = dyn_cast<LoadInst>(Val: CI->getArgOperand(i: StreamArg));
3337 if (!LI)
3338 return false;
3339 GlobalVariable *GV = dyn_cast<GlobalVariable>(Val: LI->getPointerOperand());
3340 if (!GV || !GV->isDeclaration())
3341 return false;
3342 return GV->getName() == "stderr";
3343}
3344
3345Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) {
3346 // Check for a fixed format string.
3347 StringRef FormatStr;
3348 if (!getConstantStringInfo(V: CI->getArgOperand(i: 0), Str&: FormatStr))
3349 return nullptr;
3350
3351 // Empty format string -> noop.
3352 if (FormatStr.empty()) // Tolerate printf's declared void.
3353 return CI->use_empty() ? (Value *)CI : ConstantInt::get(Ty: CI->getType(), V: 0);
3354
3355 // Do not do any of the following transformations if the printf return value
3356 // is used, in general the printf return value is not compatible with either
3357 // putchar() or puts().
3358 if (!CI->use_empty())
3359 return nullptr;
3360
3361 Type *IntTy = CI->getType();
3362 // printf("x") -> putchar('x'), even for "%" and "%%".
3363 if (FormatStr.size() == 1 || FormatStr == "%%") {
3364 // Convert the character to unsigned char before passing it to putchar
3365 // to avoid host-specific sign extension in the IR. Putchar converts
3366 // it to unsigned char regardless.
3367 Value *IntChar = ConstantInt::get(Ty: IntTy, V: (unsigned char)FormatStr[0]);
3368 return copyFlags(Old: *CI, New: emitPutChar(Char: IntChar, B, TLI));
3369 }
3370
3371 // Try to remove call or emit putchar/puts.
3372 if (FormatStr == "%s" && CI->arg_size() > 1) {
3373 StringRef OperandStr;
3374 if (!getConstantStringInfo(V: CI->getOperand(i_nocapture: 1), Str&: OperandStr))
3375 return nullptr;
3376 // printf("%s", "") --> NOP
3377 if (OperandStr.empty())
3378 return (Value *)CI;
3379 // printf("%s", "a") --> putchar('a')
3380 if (OperandStr.size() == 1) {
3381 // Convert the character to unsigned char before passing it to putchar
3382 // to avoid host-specific sign extension in the IR. Putchar converts
3383 // it to unsigned char regardless.
3384 Value *IntChar = ConstantInt::get(Ty: IntTy, V: (unsigned char)OperandStr[0]);
3385 return copyFlags(Old: *CI, New: emitPutChar(Char: IntChar, B, TLI));
3386 }
3387 // printf("%s", str"\n") --> puts(str)
3388 if (OperandStr.back() == '\n') {
3389 if (!isLibFuncEmittable(M: CI->getModule(), TLI, TheLibFunc: LibFunc_puts))
3390 return nullptr;
3391 OperandStr = OperandStr.drop_back();
3392 Value *GV = B.CreateGlobalString(Str: OperandStr, Name: "str");
3393 return copyFlags(Old: *CI, New: emitPutS(Str: GV, B, TLI));
3394 }
3395 return nullptr;
3396 }
3397
3398 // printf("foo\n") --> puts("foo")
3399 if (FormatStr.back() == '\n' &&
3400 !FormatStr.contains(C: '%')) { // No format characters.
3401 if (!isLibFuncEmittable(M: CI->getModule(), TLI, TheLibFunc: LibFunc_puts))
3402 return nullptr;
3403 // Create a string literal with no \n on it. We expect the constant merge
3404 // pass to be run after this pass, to merge duplicate strings.
3405 FormatStr = FormatStr.drop_back();
3406 Value *GV = B.CreateGlobalString(Str: FormatStr, Name: "str");
3407 return copyFlags(Old: *CI, New: emitPutS(Str: GV, B, TLI));
3408 }
3409
3410 // Optimize specific format strings.
3411 // printf("%c", chr) --> putchar(chr)
3412 if (FormatStr == "%c" && CI->arg_size() > 1 &&
3413 CI->getArgOperand(i: 1)->getType()->isIntegerTy()) {
3414 // Convert the argument to the type expected by putchar, i.e., int, which
3415 // need not be 32 bits wide but which is the same as printf's return type.
3416 Value *IntChar = B.CreateIntCast(V: CI->getArgOperand(i: 1), DestTy: IntTy, isSigned: false);
3417 return copyFlags(Old: *CI, New: emitPutChar(Char: IntChar, B, TLI));
3418 }
3419
3420 // printf("%s\n", str) --> puts(str)
3421 if (FormatStr == "%s\n" && CI->arg_size() > 1 &&
3422 CI->getArgOperand(i: 1)->getType()->isPointerTy())
3423 return copyFlags(Old: *CI, New: emitPutS(Str: CI->getArgOperand(i: 1), B, TLI));
3424 return nullptr;
3425}
3426
3427Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilderBase &B) {
3428
3429 Module *M = CI->getModule();
3430 Function *Callee = CI->getCalledFunction();
3431 FunctionType *FT = Callee->getFunctionType();
3432 if (Value *V = optimizePrintFString(CI, B)) {
3433 return V;
3434 }
3435
3436 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: 0);
3437
3438 // printf(format, ...) -> iprintf(format, ...) if no floating point
3439 // arguments.
3440 if (isLibFuncEmittable(M, TLI, TheLibFunc: LibFunc_iprintf) &&
3441 !callHasFloatingPointArgument(CI)) {
3442 FunctionCallee IPrintFFn = getOrInsertLibFunc(M, TLI: *TLI, TheLibFunc: LibFunc_iprintf, T: FT,
3443 AttributeList: Callee->getAttributes());
3444 CallInst *New = cast<CallInst>(Val: CI->clone());
3445 New->setCalledFunction(IPrintFFn);
3446 B.Insert(I: New);
3447 return New;
3448 }
3449
3450 // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point
3451 // arguments.
3452 if (isLibFuncEmittable(M, TLI, TheLibFunc: LibFunc_small_printf) &&
3453 !callHasFP128Argument(CI)) {
3454 auto SmallPrintFFn = getOrInsertLibFunc(M, TLI: *TLI, TheLibFunc: LibFunc_small_printf, T: FT,
3455 AttributeList: Callee->getAttributes());
3456 CallInst *New = cast<CallInst>(Val: CI->clone());
3457 New->setCalledFunction(SmallPrintFFn);
3458 B.Insert(I: New);
3459 return New;
3460 }
3461
3462 return nullptr;
3463}
3464
3465Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
3466 IRBuilderBase &B) {
3467 // Check for a fixed format string.
3468 StringRef FormatStr;
3469 if (!getConstantStringInfo(V: CI->getArgOperand(i: 1), Str&: FormatStr))
3470 return nullptr;
3471
3472 // If we just have a format string (nothing else crazy) transform it.
3473 Value *Dest = CI->getArgOperand(i: 0);
3474 if (CI->arg_size() == 2) {
3475 // Make sure there's no % in the constant array. We could try to handle
3476 // %% -> % in the future if we cared.
3477 if (FormatStr.contains(C: '%'))
3478 return nullptr; // we found a format specifier, bail out.
3479
3480 // sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1)
3481 B.CreateMemCpy(Dst: Dest, DstAlign: Align(1), Src: CI->getArgOperand(i: 1), SrcAlign: Align(1),
3482 // Copy the null byte.
3483 Size: TLI->getAsSizeT(V: FormatStr.size() + 1, M: *CI->getModule()));
3484 return ConstantInt::get(Ty: CI->getType(), V: FormatStr.size());
3485 }
3486
3487 // The remaining optimizations require the format string to be "%s" or "%c"
3488 // and have an extra operand.
3489 if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() < 3)
3490 return nullptr;
3491
3492 // Decode the second character of the format string.
3493 if (FormatStr[1] == 'c') {
3494 // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
3495 if (!CI->getArgOperand(i: 2)->getType()->isIntegerTy())
3496 return nullptr;
3497 Value *V = B.CreateTrunc(V: CI->getArgOperand(i: 2), DestTy: B.getInt8Ty(), Name: "char");
3498 Value *Ptr = Dest;
3499 B.CreateStore(Val: V, Ptr);
3500 Ptr = B.CreateInBoundsGEP(Ty: B.getInt8Ty(), Ptr, IdxList: B.getInt32(C: 1), Name: "nul");
3501 B.CreateStore(Val: B.getInt8(C: 0), Ptr);
3502
3503 return ConstantInt::get(Ty: CI->getType(), V: 1);
3504 }
3505
3506 if (FormatStr[1] == 's') {
3507 // sprintf(dest, "%s", str) -> llvm.memcpy(align 1 dest, align 1 str,
3508 // strlen(str)+1)
3509 if (!CI->getArgOperand(i: 2)->getType()->isPointerTy())
3510 return nullptr;
3511
3512 if (CI->use_empty())
3513 // sprintf(dest, "%s", str) -> strcpy(dest, str)
3514 return copyFlags(Old: *CI, New: emitStrCpy(Dst: Dest, Src: CI->getArgOperand(i: 2), B, TLI));
3515
3516 uint64_t SrcLen = GetStringLength(V: CI->getArgOperand(i: 2));
3517 if (SrcLen) {
3518 B.CreateMemCpy(Dst: Dest, DstAlign: Align(1), Src: CI->getArgOperand(i: 2), SrcAlign: Align(1),
3519 Size: TLI->getAsSizeT(V: SrcLen, M: *CI->getModule()));
3520 // Returns total number of characters written without null-character.
3521 return ConstantInt::get(Ty: CI->getType(), V: SrcLen - 1);
3522 } else if (Value *V = emitStpCpy(Dst: Dest, Src: CI->getArgOperand(i: 2), B, TLI)) {
3523 // sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest
3524 Value *PtrDiff = B.CreatePtrDiff(LHS: V, RHS: Dest);
3525 return B.CreateIntCast(V: PtrDiff, DestTy: CI->getType(), isSigned: false);
3526 }
3527
3528 if (llvm::shouldOptimizeForSize(BB: CI->getParent(), PSI, BFI,
3529 QueryType: PGSOQueryType::IRPass))
3530 return nullptr;
3531
3532 Value *Len = emitStrLen(Ptr: CI->getArgOperand(i: 2), B, DL, TLI);
3533 if (!Len)
3534 return nullptr;
3535 Value *IncLen =
3536 B.CreateAdd(LHS: Len, RHS: ConstantInt::get(Ty: Len->getType(), V: 1), Name: "leninc");
3537 B.CreateMemCpy(Dst: Dest, DstAlign: Align(1), Src: CI->getArgOperand(i: 2), SrcAlign: Align(1), Size: IncLen);
3538
3539 // The sprintf result is the unincremented number of bytes in the string.
3540 return B.CreateIntCast(V: Len, DestTy: CI->getType(), isSigned: false);
3541 }
3542 return nullptr;
3543}
3544
3545Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilderBase &B) {
3546 Module *M = CI->getModule();
3547 Function *Callee = CI->getCalledFunction();
3548 FunctionType *FT = Callee->getFunctionType();
3549 if (Value *V = optimizeSPrintFString(CI, B)) {
3550 return V;
3551 }
3552
3553 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: {0, 1});
3554
3555 // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
3556 // point arguments.
3557 if (isLibFuncEmittable(M, TLI, TheLibFunc: LibFunc_siprintf) &&
3558 !callHasFloatingPointArgument(CI)) {
3559 FunctionCallee SIPrintFFn = getOrInsertLibFunc(M, TLI: *TLI, TheLibFunc: LibFunc_siprintf,
3560 T: FT, AttributeList: Callee->getAttributes());
3561 CallInst *New = cast<CallInst>(Val: CI->clone());
3562 New->setCalledFunction(SIPrintFFn);
3563 B.Insert(I: New);
3564 return New;
3565 }
3566
3567 // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit
3568 // floating point arguments.
3569 if (isLibFuncEmittable(M, TLI, TheLibFunc: LibFunc_small_sprintf) &&
3570 !callHasFP128Argument(CI)) {
3571 auto SmallSPrintFFn = getOrInsertLibFunc(M, TLI: *TLI, TheLibFunc: LibFunc_small_sprintf, T: FT,
3572 AttributeList: Callee->getAttributes());
3573 CallInst *New = cast<CallInst>(Val: CI->clone());
3574 New->setCalledFunction(SmallSPrintFFn);
3575 B.Insert(I: New);
3576 return New;
3577 }
3578
3579 return nullptr;
3580}
3581
3582// Transform an snprintf call CI with the bound N to format the string Str
3583// either to a call to memcpy, or to single character a store, or to nothing,
3584// and fold the result to a constant. A nonnull StrArg refers to the string
3585// argument being formatted. Otherwise the call is one with N < 2 and
3586// the "%c" directive to format a single character.
3587Value *LibCallSimplifier::emitSnPrintfMemCpy(CallInst *CI, Value *StrArg,
3588 StringRef Str, uint64_t N,
3589 IRBuilderBase &B) {
3590 assert(StrArg || (N < 2 && Str.size() == 1));
3591
3592 unsigned IntBits = TLI->getIntSize();
3593 uint64_t IntMax = maxIntN(N: IntBits);
3594 if (Str.size() > IntMax)
3595 // Bail if the string is longer than INT_MAX. POSIX requires
3596 // implementations to set errno to EOVERFLOW in this case, in
3597 // addition to when N is larger than that (checked by the caller).
3598 return nullptr;
3599
3600 Value *StrLen = ConstantInt::get(Ty: CI->getType(), V: Str.size());
3601 if (N == 0)
3602 return StrLen;
3603
3604 // Set to the number of bytes to copy fron StrArg which is also
3605 // the offset of the terinating nul.
3606 uint64_t NCopy;
3607 if (N > Str.size())
3608 // Copy the full string, including the terminating nul (which must
3609 // be present regardless of the bound).
3610 NCopy = Str.size() + 1;
3611 else
3612 NCopy = N - 1;
3613
3614 Value *DstArg = CI->getArgOperand(i: 0);
3615 if (NCopy && StrArg)
3616 // Transform the call to lvm.memcpy(dst, fmt, N).
3617 copyFlags(Old: *CI, New: B.CreateMemCpy(Dst: DstArg, DstAlign: Align(1), Src: StrArg, SrcAlign: Align(1),
3618 Size: TLI->getAsSizeT(V: NCopy, M: *CI->getModule())));
3619
3620 if (N > Str.size())
3621 // Return early when the whole format string, including the final nul,
3622 // has been copied.
3623 return StrLen;
3624
3625 // Otherwise, when truncating the string append a terminating nul.
3626 Type *Int8Ty = B.getInt8Ty();
3627 Value *NulOff = B.getIntN(N: IntBits, C: NCopy);
3628 Value *DstEnd = B.CreateInBoundsGEP(Ty: Int8Ty, Ptr: DstArg, IdxList: NulOff, Name: "endptr");
3629 B.CreateStore(Val: ConstantInt::get(Ty: Int8Ty, V: 0), Ptr: DstEnd);
3630 return StrLen;
3631}
3632
3633Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI,
3634 IRBuilderBase &B) {
3635 // Check for size
3636 ConstantInt *Size = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 1));
3637 if (!Size)
3638 return nullptr;
3639
3640 uint64_t N = Size->getZExtValue();
3641 uint64_t IntMax = maxIntN(N: TLI->getIntSize());
3642 if (N > IntMax)
3643 // Bail if the bound exceeds INT_MAX. POSIX requires implementations
3644 // to set errno to EOVERFLOW in this case.
3645 return nullptr;
3646
3647 Value *DstArg = CI->getArgOperand(i: 0);
3648 Value *FmtArg = CI->getArgOperand(i: 2);
3649
3650 // Check for a fixed format string.
3651 StringRef FormatStr;
3652 if (!getConstantStringInfo(V: FmtArg, Str&: FormatStr))
3653 return nullptr;
3654
3655 // If we just have a format string (nothing else crazy) transform it.
3656 if (CI->arg_size() == 3) {
3657 if (FormatStr.contains(C: '%'))
3658 // Bail if the format string contains a directive and there are
3659 // no arguments. We could handle "%%" in the future.
3660 return nullptr;
3661
3662 return emitSnPrintfMemCpy(CI, StrArg: FmtArg, Str: FormatStr, N, B);
3663 }
3664
3665 // The remaining optimizations require the format string to be "%s" or "%c"
3666 // and have an extra operand.
3667 if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() != 4)
3668 return nullptr;
3669
3670 // Decode the second character of the format string.
3671 if (FormatStr[1] == 'c') {
3672 if (N <= 1) {
3673 // Use an arbitary string of length 1 to transform the call into
3674 // either a nul store (N == 1) or a no-op (N == 0) and fold it
3675 // to one.
3676 StringRef CharStr("*");
3677 return emitSnPrintfMemCpy(CI, StrArg: nullptr, Str: CharStr, N, B);
3678 }
3679
3680 // snprintf(dst, size, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
3681 if (!CI->getArgOperand(i: 3)->getType()->isIntegerTy())
3682 return nullptr;
3683 Value *V = B.CreateTrunc(V: CI->getArgOperand(i: 3), DestTy: B.getInt8Ty(), Name: "char");
3684 Value *Ptr = DstArg;
3685 B.CreateStore(Val: V, Ptr);
3686 Ptr = B.CreateInBoundsGEP(Ty: B.getInt8Ty(), Ptr, IdxList: B.getInt32(C: 1), Name: "nul");
3687 B.CreateStore(Val: B.getInt8(C: 0), Ptr);
3688 return ConstantInt::get(Ty: CI->getType(), V: 1);
3689 }
3690
3691 if (FormatStr[1] != 's')
3692 return nullptr;
3693
3694 Value *StrArg = CI->getArgOperand(i: 3);
3695 // snprintf(dest, size, "%s", str) to llvm.memcpy(dest, str, len+1, 1)
3696 StringRef Str;
3697 if (!getConstantStringInfo(V: StrArg, Str))
3698 return nullptr;
3699
3700 return emitSnPrintfMemCpy(CI, StrArg, Str, N, B);
3701}
3702
3703Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilderBase &B) {
3704 if (Value *V = optimizeSnPrintFString(CI, B)) {
3705 return V;
3706 }
3707
3708 if (isKnownNonZero(V: CI->getOperand(i_nocapture: 1), Q: DL))
3709 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: 0);
3710 return nullptr;
3711}
3712
3713Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI,
3714 IRBuilderBase &B) {
3715 optimizeErrorReporting(CI, B, StreamArg: 0);
3716
3717 // All the optimizations depend on the format string.
3718 StringRef FormatStr;
3719 if (!getConstantStringInfo(V: CI->getArgOperand(i: 1), Str&: FormatStr))
3720 return nullptr;
3721
3722 // Do not do any of the following transformations if the fprintf return
3723 // value is used, in general the fprintf return value is not compatible
3724 // with fwrite(), fputc() or fputs().
3725 if (!CI->use_empty())
3726 return nullptr;
3727
3728 // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
3729 if (CI->arg_size() == 2) {
3730 // Could handle %% -> % if we cared.
3731 if (FormatStr.contains(C: '%'))
3732 return nullptr; // We found a format specifier.
3733
3734 return copyFlags(
3735 Old: *CI, New: emitFWrite(Ptr: CI->getArgOperand(i: 1),
3736 Size: TLI->getAsSizeT(V: FormatStr.size(), M: *CI->getModule()),
3737 File: CI->getArgOperand(i: 0), B, DL, TLI));
3738 }
3739
3740 // The remaining optimizations require the format string to be "%s" or "%c"
3741 // and have an extra operand.
3742 if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() < 3)
3743 return nullptr;
3744
3745 // Decode the second character of the format string.
3746 if (FormatStr[1] == 'c') {
3747 // fprintf(F, "%c", chr) --> fputc((int)chr, F)
3748 if (!CI->getArgOperand(i: 2)->getType()->isIntegerTy())
3749 return nullptr;
3750 Type *IntTy = B.getIntNTy(N: TLI->getIntSize());
3751 Value *V = B.CreateIntCast(V: CI->getArgOperand(i: 2), DestTy: IntTy, /*isSigned*/ true,
3752 Name: "chari");
3753 return copyFlags(Old: *CI, New: emitFPutC(Char: V, File: CI->getArgOperand(i: 0), B, TLI));
3754 }
3755
3756 if (FormatStr[1] == 's') {
3757 // fprintf(F, "%s", str) --> fputs(str, F)
3758 if (!CI->getArgOperand(i: 2)->getType()->isPointerTy())
3759 return nullptr;
3760 return copyFlags(
3761 Old: *CI, New: emitFPutS(Str: CI->getArgOperand(i: 2), File: CI->getArgOperand(i: 0), B, TLI));
3762 }
3763 return nullptr;
3764}
3765
3766Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilderBase &B) {
3767 Module *M = CI->getModule();
3768 Function *Callee = CI->getCalledFunction();
3769 FunctionType *FT = Callee->getFunctionType();
3770 if (Value *V = optimizeFPrintFString(CI, B)) {
3771 return V;
3772 }
3773
3774 // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
3775 // floating point arguments.
3776 if (isLibFuncEmittable(M, TLI, TheLibFunc: LibFunc_fiprintf) &&
3777 !callHasFloatingPointArgument(CI)) {
3778 FunctionCallee FIPrintFFn = getOrInsertLibFunc(M, TLI: *TLI, TheLibFunc: LibFunc_fiprintf,
3779 T: FT, AttributeList: Callee->getAttributes());
3780 CallInst *New = cast<CallInst>(Val: CI->clone());
3781 New->setCalledFunction(FIPrintFFn);
3782 B.Insert(I: New);
3783 return New;
3784 }
3785
3786 // fprintf(stream, format, ...) -> __small_fprintf(stream, format, ...) if no
3787 // 128-bit floating point arguments.
3788 if (isLibFuncEmittable(M, TLI, TheLibFunc: LibFunc_small_fprintf) &&
3789 !callHasFP128Argument(CI)) {
3790 auto SmallFPrintFFn =
3791 getOrInsertLibFunc(M, TLI: *TLI, TheLibFunc: LibFunc_small_fprintf, T: FT,
3792 AttributeList: Callee->getAttributes());
3793 CallInst *New = cast<CallInst>(Val: CI->clone());
3794 New->setCalledFunction(SmallFPrintFFn);
3795 B.Insert(I: New);
3796 return New;
3797 }
3798
3799 return nullptr;
3800}
3801
3802Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilderBase &B) {
3803 optimizeErrorReporting(CI, B, StreamArg: 3);
3804
3805 // Get the element size and count.
3806 ConstantInt *SizeC = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 1));
3807 ConstantInt *CountC = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 2));
3808 if (SizeC && CountC) {
3809 uint64_t Bytes = SizeC->getZExtValue() * CountC->getZExtValue();
3810
3811 // If this is writing zero records, remove the call (it's a noop).
3812 if (Bytes == 0)
3813 return ConstantInt::get(Ty: CI->getType(), V: 0);
3814
3815 // If this is writing one byte, turn it into fputc.
3816 // This optimisation is only valid, if the return value is unused.
3817 if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
3818 Value *Char = B.CreateLoad(Ty: B.getInt8Ty(), Ptr: CI->getArgOperand(i: 0), Name: "char");
3819 Type *IntTy = B.getIntNTy(N: TLI->getIntSize());
3820 Value *Cast = B.CreateIntCast(V: Char, DestTy: IntTy, /*isSigned*/ true, Name: "chari");
3821 Value *NewCI = emitFPutC(Char: Cast, File: CI->getArgOperand(i: 3), B, TLI);
3822 return NewCI ? ConstantInt::get(Ty: CI->getType(), V: 1) : nullptr;
3823 }
3824 }
3825
3826 return nullptr;
3827}
3828
3829Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilderBase &B) {
3830 optimizeErrorReporting(CI, B, StreamArg: 1);
3831
3832 // Don't rewrite fputs to fwrite when optimising for size because fwrite
3833 // requires more arguments and thus extra MOVs are required.
3834 if (llvm::shouldOptimizeForSize(BB: CI->getParent(), PSI, BFI,
3835 QueryType: PGSOQueryType::IRPass))
3836 return nullptr;
3837
3838 // We can't optimize if return value is used.
3839 if (!CI->use_empty())
3840 return nullptr;
3841
3842 // fputs(s,F) --> fwrite(s,strlen(s),1,F)
3843 uint64_t Len = GetStringLength(V: CI->getArgOperand(i: 0));
3844 if (!Len)
3845 return nullptr;
3846
3847 // Known to have no uses (see above).
3848 unsigned SizeTBits = TLI->getSizeTSize(M: *CI->getModule());
3849 Type *SizeTTy = IntegerType::get(C&: CI->getContext(), NumBits: SizeTBits);
3850 return copyFlags(
3851 Old: *CI,
3852 New: emitFWrite(Ptr: CI->getArgOperand(i: 0),
3853 Size: ConstantInt::get(Ty: SizeTTy, V: Len - 1),
3854 File: CI->getArgOperand(i: 1), B, DL, TLI));
3855}
3856
3857Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilderBase &B) {
3858 annotateNonNullNoUndefBasedOnAccess(CI, ArgNos: 0);
3859 if (!CI->use_empty())
3860 return nullptr;
3861
3862 // Check for a constant string.
3863 // puts("") -> putchar('\n')
3864 StringRef Str;
3865 if (getConstantStringInfo(V: CI->getArgOperand(i: 0), Str) && Str.empty()) {
3866 // putchar takes an argument of the same type as puts returns, i.e.,
3867 // int, which need not be 32 bits wide.
3868 Type *IntTy = CI->getType();
3869 return copyFlags(Old: *CI, New: emitPutChar(Char: ConstantInt::get(Ty: IntTy, V: '\n'), B, TLI));
3870 }
3871
3872 return nullptr;
3873}
3874
3875Value *LibCallSimplifier::optimizeExit(CallInst *CI) {
3876
3877 // Mark 'exit' as cold if its not exit(0) (success).
3878 const APInt *C;
3879 if (!CI->hasFnAttr(Kind: Attribute::Cold) &&
3880 match(V: CI->getArgOperand(i: 0), P: m_APInt(Res&: C)) && !C->isZero()) {
3881 CI->addFnAttr(Kind: Attribute::Cold);
3882 }
3883 return nullptr;
3884}
3885
3886Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilderBase &B) {
3887 // bcopy(src, dst, n) -> llvm.memmove(dst, src, n)
3888 return copyFlags(Old: *CI, New: B.CreateMemMove(Dst: CI->getArgOperand(i: 1), DstAlign: Align(1),
3889 Src: CI->getArgOperand(i: 0), SrcAlign: Align(1),
3890 Size: CI->getArgOperand(i: 2)));
3891}
3892
3893bool LibCallSimplifier::hasFloatVersion(const Module *M, StringRef FuncName) {
3894 SmallString<20> FloatFuncName = FuncName;
3895 FloatFuncName += 'f';
3896 return isLibFuncEmittable(M, TLI, Name: FloatFuncName);
3897}
3898
3899Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
3900 IRBuilderBase &Builder) {
3901 Module *M = CI->getModule();
3902 LibFunc Func;
3903 Function *Callee = CI->getCalledFunction();
3904
3905 // Check for string/memory library functions.
3906 if (TLI->getLibFunc(FDecl: *Callee, F&: Func) && isLibFuncEmittable(M, TLI, TheLibFunc: Func)) {
3907 // Make sure we never change the calling convention.
3908 assert(
3909 (ignoreCallingConv(Func) ||
3910 TargetLibraryInfoImpl::isCallingConvCCompatible(CI)) &&
3911 "Optimizing string/memory libcall would change the calling convention");
3912 switch (Func) {
3913 case LibFunc_strcat:
3914 return optimizeStrCat(CI, B&: Builder);
3915 case LibFunc_strncat:
3916 return optimizeStrNCat(CI, B&: Builder);
3917 case LibFunc_strchr:
3918 return optimizeStrChr(CI, B&: Builder);
3919 case LibFunc_strrchr:
3920 return optimizeStrRChr(CI, B&: Builder);
3921 case LibFunc_strcmp:
3922 return optimizeStrCmp(CI, B&: Builder);
3923 case LibFunc_strncmp:
3924 return optimizeStrNCmp(CI, B&: Builder);
3925 case LibFunc_strcpy:
3926 return optimizeStrCpy(CI, B&: Builder);
3927 case LibFunc_stpcpy:
3928 return optimizeStpCpy(CI, B&: Builder);
3929 case LibFunc_strlcpy:
3930 return optimizeStrLCpy(CI, B&: Builder);
3931 case LibFunc_stpncpy:
3932 return optimizeStringNCpy(CI, /*RetEnd=*/true, B&: Builder);
3933 case LibFunc_strncpy:
3934 return optimizeStringNCpy(CI, /*RetEnd=*/false, B&: Builder);
3935 case LibFunc_strlen:
3936 return optimizeStrLen(CI, B&: Builder);
3937 case LibFunc_strnlen:
3938 return optimizeStrNLen(CI, B&: Builder);
3939 case LibFunc_strpbrk:
3940 return optimizeStrPBrk(CI, B&: Builder);
3941 case LibFunc_strndup:
3942 return optimizeStrNDup(CI, B&: Builder);
3943 case LibFunc_strtol:
3944 case LibFunc_strtod:
3945 case LibFunc_strtof:
3946 case LibFunc_strtoul:
3947 case LibFunc_strtoll:
3948 case LibFunc_strtold:
3949 case LibFunc_strtoull:
3950 return optimizeStrTo(CI, B&: Builder);
3951 case LibFunc_strspn:
3952 return optimizeStrSpn(CI, B&: Builder);
3953 case LibFunc_strcspn:
3954 return optimizeStrCSpn(CI, B&: Builder);
3955 case LibFunc_strstr:
3956 return optimizeStrStr(CI, B&: Builder);
3957 case LibFunc_memchr:
3958 return optimizeMemChr(CI, B&: Builder);
3959 case LibFunc_memrchr:
3960 return optimizeMemRChr(CI, B&: Builder);
3961 case LibFunc_bcmp:
3962 return optimizeBCmp(CI, B&: Builder);
3963 case LibFunc_memcmp:
3964 return optimizeMemCmp(CI, B&: Builder);
3965 case LibFunc_memcpy:
3966 return optimizeMemCpy(CI, B&: Builder);
3967 case LibFunc_memccpy:
3968 return optimizeMemCCpy(CI, B&: Builder);
3969 case LibFunc_mempcpy:
3970 return optimizeMemPCpy(CI, B&: Builder);
3971 case LibFunc_memmove:
3972 return optimizeMemMove(CI, B&: Builder);
3973 case LibFunc_memset:
3974 return optimizeMemSet(CI, B&: Builder);
3975 case LibFunc_realloc:
3976 return optimizeRealloc(CI, B&: Builder);
3977 case LibFunc_wcslen:
3978 return optimizeWcslen(CI, B&: Builder);
3979 case LibFunc_bcopy:
3980 return optimizeBCopy(CI, B&: Builder);
3981 case LibFunc_Znwm:
3982 case LibFunc_ZnwmRKSt9nothrow_t:
3983 case LibFunc_ZnwmSt11align_val_t:
3984 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
3985 case LibFunc_Znam:
3986 case LibFunc_ZnamRKSt9nothrow_t:
3987 case LibFunc_ZnamSt11align_val_t:
3988 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
3989 case LibFunc_Znwm12__hot_cold_t:
3990 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
3991 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
3992 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
3993 case LibFunc_Znam12__hot_cold_t:
3994 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
3995 case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
3996 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
3997 case LibFunc_size_returning_new:
3998 case LibFunc_size_returning_new_hot_cold:
3999 case LibFunc_size_returning_new_aligned:
4000 case LibFunc_size_returning_new_aligned_hot_cold:
4001 return optimizeNew(CI, B&: Builder, Func);
4002 default:
4003 break;
4004 }
4005 }
4006 return nullptr;
4007}
4008
4009/// Constant folding nan/nanf/nanl.
4010static Value *optimizeNaN(CallInst *CI) {
4011 StringRef CharSeq;
4012 if (!getConstantStringInfo(V: CI->getArgOperand(i: 0), Str&: CharSeq))
4013 return nullptr;
4014
4015 APInt Fill;
4016 // Treat empty strings as if they were zero.
4017 if (CharSeq.empty())
4018 Fill = APInt(32, 0);
4019 else if (CharSeq.getAsInteger(Radix: 0, Result&: Fill))
4020 return nullptr;
4021
4022 return ConstantFP::getQNaN(Ty: CI->getType(), /*Negative=*/false, Payload: &Fill);
4023}
4024
4025Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
4026 LibFunc Func,
4027 IRBuilderBase &Builder) {
4028 const Module *M = CI->getModule();
4029
4030 // Don't optimize calls that require strict floating point semantics.
4031 if (CI->isStrictFP())
4032 return nullptr;
4033
4034 if (Value *V = optimizeSymmetric(CI, Func, B&: Builder))
4035 return V;
4036
4037 switch (Func) {
4038 case LibFunc_sinpif:
4039 case LibFunc_sinpi:
4040 return optimizeSinCosPi(CI, /*IsSin*/true, B&: Builder);
4041 case LibFunc_cospif:
4042 case LibFunc_cospi:
4043 return optimizeSinCosPi(CI, /*IsSin*/false, B&: Builder);
4044 case LibFunc_powf:
4045 case LibFunc_pow:
4046 case LibFunc_powl:
4047 return optimizePow(Pow: CI, B&: Builder);
4048 case LibFunc_exp2l:
4049 case LibFunc_exp2:
4050 case LibFunc_exp2f:
4051 return optimizeExp2(CI, B&: Builder);
4052 case LibFunc_fabsf:
4053 case LibFunc_fabs:
4054 case LibFunc_fabsl:
4055 return replaceUnaryCall(CI, B&: Builder, IID: Intrinsic::fabs);
4056 case LibFunc_sqrtf:
4057 case LibFunc_sqrt:
4058 case LibFunc_sqrtl:
4059 return optimizeSqrt(CI, B&: Builder);
4060 case LibFunc_fmod:
4061 case LibFunc_fmodf:
4062 case LibFunc_fmodl:
4063 return optimizeFMod(CI, B&: Builder);
4064 case LibFunc_logf:
4065 case LibFunc_log:
4066 case LibFunc_logl:
4067 case LibFunc_log10f:
4068 case LibFunc_log10:
4069 case LibFunc_log10l:
4070 case LibFunc_log1pf:
4071 case LibFunc_log1p:
4072 case LibFunc_log1pl:
4073 case LibFunc_log2f:
4074 case LibFunc_log2:
4075 case LibFunc_log2l:
4076 case LibFunc_logbf:
4077 case LibFunc_logb:
4078 case LibFunc_logbl:
4079 return optimizeLog(Log: CI, B&: Builder);
4080 case LibFunc_tan:
4081 case LibFunc_tanf:
4082 case LibFunc_tanl:
4083 case LibFunc_sinh:
4084 case LibFunc_sinhf:
4085 case LibFunc_sinhl:
4086 case LibFunc_asinh:
4087 case LibFunc_asinhf:
4088 case LibFunc_asinhl:
4089 case LibFunc_cosh:
4090 case LibFunc_coshf:
4091 case LibFunc_coshl:
4092 case LibFunc_atanh:
4093 case LibFunc_atanhf:
4094 case LibFunc_atanhl:
4095 return optimizeTrigInversionPairs(CI, B&: Builder);
4096 case LibFunc_ceil:
4097 return replaceUnaryCall(CI, B&: Builder, IID: Intrinsic::ceil);
4098 case LibFunc_floor:
4099 return replaceUnaryCall(CI, B&: Builder, IID: Intrinsic::floor);
4100 case LibFunc_round:
4101 return replaceUnaryCall(CI, B&: Builder, IID: Intrinsic::round);
4102 case LibFunc_roundeven:
4103 return replaceUnaryCall(CI, B&: Builder, IID: Intrinsic::roundeven);
4104 case LibFunc_nearbyint:
4105 return replaceUnaryCall(CI, B&: Builder, IID: Intrinsic::nearbyint);
4106 case LibFunc_rint:
4107 return replaceUnaryCall(CI, B&: Builder, IID: Intrinsic::rint);
4108 case LibFunc_trunc:
4109 return replaceUnaryCall(CI, B&: Builder, IID: Intrinsic::trunc);
4110 case LibFunc_acos:
4111 case LibFunc_acosh:
4112 case LibFunc_asin:
4113 case LibFunc_atan:
4114 case LibFunc_cbrt:
4115 case LibFunc_exp:
4116 case LibFunc_exp10:
4117 case LibFunc_expm1:
4118 case LibFunc_cos:
4119 case LibFunc_sin:
4120 case LibFunc_tanh:
4121 if (UnsafeFPShrink && hasFloatVersion(M, FuncName: CI->getCalledFunction()->getName()))
4122 return optimizeUnaryDoubleFP(CI, B&: Builder, TLI, isPrecise: true);
4123 return nullptr;
4124 case LibFunc_copysign:
4125 if (hasFloatVersion(M, FuncName: CI->getCalledFunction()->getName()))
4126 return optimizeBinaryDoubleFP(CI, B&: Builder, TLI);
4127 return nullptr;
4128 case LibFunc_fdim:
4129 case LibFunc_fdimf:
4130 case LibFunc_fdiml:
4131 return optimizeFdim(CI, B&: Builder);
4132 case LibFunc_fminf:
4133 case LibFunc_fmin:
4134 case LibFunc_fminl:
4135 return optimizeFMinFMax(CI, B&: Builder, IID: Intrinsic::minnum);
4136 case LibFunc_fmaxf:
4137 case LibFunc_fmax:
4138 case LibFunc_fmaxl:
4139 return optimizeFMinFMax(CI, B&: Builder, IID: Intrinsic::maxnum);
4140 case LibFunc_fminimum_numf:
4141 case LibFunc_fminimum_num:
4142 case LibFunc_fminimum_numl:
4143 return replaceBinaryCall(CI, B&: Builder, IID: Intrinsic::minimumnum);
4144 case LibFunc_fmaximum_numf:
4145 case LibFunc_fmaximum_num:
4146 case LibFunc_fmaximum_numl:
4147 return replaceBinaryCall(CI, B&: Builder, IID: Intrinsic::maximumnum);
4148 case LibFunc_cabs:
4149 case LibFunc_cabsf:
4150 case LibFunc_cabsl:
4151 return optimizeCAbs(CI, B&: Builder);
4152 case LibFunc_remquo:
4153 case LibFunc_remquof:
4154 case LibFunc_remquol:
4155 return optimizeRemquo(CI, B&: Builder);
4156 case LibFunc_nan:
4157 case LibFunc_nanf:
4158 case LibFunc_nanl:
4159 return optimizeNaN(CI);
4160 default:
4161 return nullptr;
4162 }
4163}
4164
4165Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
4166 Module *M = CI->getModule();
4167 assert(!CI->isMustTailCall() && "These transforms aren't musttail safe.");
4168
4169 // TODO: Split out the code below that operates on FP calls so that
4170 // we can all non-FP calls with the StrictFP attribute to be
4171 // optimized.
4172 if (CI->isNoBuiltin()) {
4173 // Optionally update operator new calls.
4174 return maybeOptimizeNoBuiltinOperatorNew(CI, B&: Builder);
4175 }
4176
4177 LibFunc Func;
4178 Function *Callee = CI->getCalledFunction();
4179 bool IsCallingConvC = TargetLibraryInfoImpl::isCallingConvCCompatible(CI);
4180
4181 SmallVector<OperandBundleDef, 2> OpBundles;
4182 CI->getOperandBundlesAsDefs(Defs&: OpBundles);
4183
4184 IRBuilderBase::OperandBundlesGuard Guard(Builder);
4185 Builder.setDefaultOperandBundles(OpBundles);
4186
4187 // Command-line parameter overrides instruction attribute.
4188 // This can't be moved to optimizeFloatingPointLibCall() because it may be
4189 // used by the intrinsic optimizations.
4190 if (EnableUnsafeFPShrink.getNumOccurrences() > 0)
4191 UnsafeFPShrink = EnableUnsafeFPShrink;
4192 else if (isa<FPMathOperator>(Val: CI) && CI->isFast())
4193 UnsafeFPShrink = true;
4194
4195 // First, check for intrinsics.
4196 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: CI)) {
4197 if (!IsCallingConvC)
4198 return nullptr;
4199 // The FP intrinsics have corresponding constrained versions so we don't
4200 // need to check for the StrictFP attribute here.
4201 switch (II->getIntrinsicID()) {
4202 case Intrinsic::pow:
4203 return optimizePow(Pow: CI, B&: Builder);
4204 case Intrinsic::exp2:
4205 return optimizeExp2(CI, B&: Builder);
4206 case Intrinsic::log:
4207 case Intrinsic::log2:
4208 case Intrinsic::log10:
4209 return optimizeLog(Log: CI, B&: Builder);
4210 case Intrinsic::sqrt:
4211 return optimizeSqrt(CI, B&: Builder);
4212 case Intrinsic::memset:
4213 return optimizeMemSet(CI, B&: Builder);
4214 case Intrinsic::memcpy:
4215 return optimizeMemCpy(CI, B&: Builder);
4216 case Intrinsic::memmove:
4217 return optimizeMemMove(CI, B&: Builder);
4218 case Intrinsic::sin:
4219 case Intrinsic::cos:
4220 if (UnsafeFPShrink)
4221 return optimizeUnaryDoubleFP(CI, B&: Builder, TLI, /*isPrecise=*/true);
4222 return nullptr;
4223 default:
4224 return nullptr;
4225 }
4226 }
4227
4228 // Also try to simplify calls to fortified library functions.
4229 if (Value *SimplifiedFortifiedCI =
4230 FortifiedSimplifier.optimizeCall(CI, B&: Builder))
4231 return SimplifiedFortifiedCI;
4232
4233 // Then check for known library functions.
4234 if (TLI->getLibFunc(FDecl: *Callee, F&: Func) && isLibFuncEmittable(M, TLI, TheLibFunc: Func)) {
4235 // We never change the calling convention.
4236 if (!ignoreCallingConv(Func) && !IsCallingConvC)
4237 return nullptr;
4238 if (Value *V = optimizeStringMemoryLibCall(CI, Builder))
4239 return V;
4240 if (Value *V = optimizeFloatingPointLibCall(CI, Func, Builder))
4241 return V;
4242 switch (Func) {
4243 case LibFunc_ffs:
4244 case LibFunc_ffsl:
4245 case LibFunc_ffsll:
4246 return optimizeFFS(CI, B&: Builder);
4247 case LibFunc_fls:
4248 case LibFunc_flsl:
4249 case LibFunc_flsll:
4250 return optimizeFls(CI, B&: Builder);
4251 case LibFunc_abs:
4252 case LibFunc_labs:
4253 case LibFunc_llabs:
4254 return optimizeAbs(CI, B&: Builder);
4255 case LibFunc_isdigit:
4256 return optimizeIsDigit(CI, B&: Builder);
4257 case LibFunc_isascii:
4258 return optimizeIsAscii(CI, B&: Builder);
4259 case LibFunc_toascii:
4260 return optimizeToAscii(CI, B&: Builder);
4261 case LibFunc_atoi:
4262 case LibFunc_atol:
4263 case LibFunc_atoll:
4264 return optimizeAtoi(CI, B&: Builder);
4265 case LibFunc_strtol:
4266 case LibFunc_strtoll:
4267 return optimizeStrToInt(CI, B&: Builder, /*AsSigned=*/true);
4268 case LibFunc_strtoul:
4269 case LibFunc_strtoull:
4270 return optimizeStrToInt(CI, B&: Builder, /*AsSigned=*/false);
4271 case LibFunc_printf:
4272 return optimizePrintF(CI, B&: Builder);
4273 case LibFunc_sprintf:
4274 return optimizeSPrintF(CI, B&: Builder);
4275 case LibFunc_snprintf:
4276 return optimizeSnPrintF(CI, B&: Builder);
4277 case LibFunc_fprintf:
4278 return optimizeFPrintF(CI, B&: Builder);
4279 case LibFunc_fwrite:
4280 return optimizeFWrite(CI, B&: Builder);
4281 case LibFunc_fputs:
4282 return optimizeFPuts(CI, B&: Builder);
4283 case LibFunc_puts:
4284 return optimizePuts(CI, B&: Builder);
4285 case LibFunc_perror:
4286 return optimizeErrorReporting(CI, B&: Builder);
4287 case LibFunc_vfprintf:
4288 case LibFunc_fiprintf:
4289 return optimizeErrorReporting(CI, B&: Builder, StreamArg: 0);
4290 case LibFunc_exit:
4291 case LibFunc_Exit:
4292 return optimizeExit(CI);
4293 default:
4294 return nullptr;
4295 }
4296 }
4297 return nullptr;
4298}
4299
4300LibCallSimplifier::LibCallSimplifier(
4301 const DataLayout &DL, const TargetLibraryInfo *TLI, DominatorTree *DT,
4302 DomConditionCache *DC, AssumptionCache *AC, OptimizationRemarkEmitter &ORE,
4303 BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
4304 function_ref<void(Instruction *, Value *)> Replacer,
4305 function_ref<void(Instruction *)> Eraser)
4306 : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), DT(DT), DC(DC), AC(AC),
4307 ORE(ORE), BFI(BFI), PSI(PSI), Replacer(Replacer), Eraser(Eraser) {}
4308
4309void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
4310 // Indirect through the replacer used in this instance.
4311 Replacer(I, With);
4312}
4313
4314void LibCallSimplifier::eraseFromParent(Instruction *I) {
4315 Eraser(I);
4316}
4317
4318// TODO:
4319// Additional cases that we need to add to this file:
4320//
4321// cbrt:
4322// * cbrt(expN(X)) -> expN(x/3)
4323// * cbrt(sqrt(x)) -> pow(x,1/6)
4324// * cbrt(cbrt(x)) -> pow(x,1/9)
4325//
4326// exp, expf, expl:
4327// * exp(log(x)) -> x
4328//
4329// log, logf, logl:
4330// * log(exp(x)) -> x
4331// * log(exp(y)) -> y*log(e)
4332// * log(exp10(y)) -> y*log(10)
4333// * log(sqrt(x)) -> 0.5*log(x)
4334//
4335// pow, powf, powl:
4336// * pow(sqrt(x),y) -> pow(x,y*0.5)
4337// * pow(pow(x,y),z)-> pow(x,y*z)
4338//
4339// signbit:
4340// * signbit(cnst) -> cnst'
4341// * signbit(nncst) -> 0 (if pstv is a non-negative constant)
4342//
4343// sqrt, sqrtf, sqrtl:
4344// * sqrt(expN(x)) -> expN(x*0.5)
4345// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
4346// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
4347//
4348
4349//===----------------------------------------------------------------------===//
4350// Fortified Library Call Optimizations
4351//===----------------------------------------------------------------------===//
4352
4353bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(
4354 CallInst *CI, unsigned ObjSizeOp, std::optional<unsigned> SizeOp,
4355 std::optional<unsigned> StrOp, std::optional<unsigned> FlagOp) {
4356 // If this function takes a flag argument, the implementation may use it to
4357 // perform extra checks. Don't fold into the non-checking variant.
4358 if (FlagOp) {
4359 ConstantInt *Flag = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: *FlagOp));
4360 if (!Flag || !Flag->isZero())
4361 return false;
4362 }
4363
4364 if (SizeOp && CI->getArgOperand(i: ObjSizeOp) == CI->getArgOperand(i: *SizeOp))
4365 return true;
4366
4367 if (ConstantInt *ObjSizeCI =
4368 dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: ObjSizeOp))) {
4369 if (ObjSizeCI->isMinusOne())
4370 return true;
4371 // If the object size wasn't -1 (unknown), bail out if we were asked to.
4372 if (OnlyLowerUnknownSize)
4373 return false;
4374 if (StrOp) {
4375 uint64_t Len = GetStringLength(V: CI->getArgOperand(i: *StrOp));
4376 // If the length is 0 we don't know how long it is and so we can't
4377 // remove the check.
4378 if (Len)
4379 annotateDereferenceableBytes(CI, ArgNos: *StrOp, DereferenceableBytes: Len);
4380 else
4381 return false;
4382 return ObjSizeCI->getZExtValue() >= Len;
4383 }
4384
4385 if (SizeOp) {
4386 if (ConstantInt *SizeCI =
4387 dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: *SizeOp)))
4388 return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue();
4389 }
4390 }
4391 return false;
4392}
4393
4394Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
4395 IRBuilderBase &B) {
4396 if (isFortifiedCallFoldable(CI, ObjSizeOp: 3, SizeOp: 2)) {
4397 CallInst *NewCI =
4398 B.CreateMemCpy(Dst: CI->getArgOperand(i: 0), DstAlign: Align(1), Src: CI->getArgOperand(i: 1),
4399 SrcAlign: Align(1), Size: CI->getArgOperand(i: 2));
4400 mergeAttributesAndFlags(NewCI, Old: *CI);
4401 return CI->getArgOperand(i: 0);
4402 }
4403 return nullptr;
4404}
4405
4406Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
4407 IRBuilderBase &B) {
4408 if (isFortifiedCallFoldable(CI, ObjSizeOp: 3, SizeOp: 2)) {
4409 CallInst *NewCI =
4410 B.CreateMemMove(Dst: CI->getArgOperand(i: 0), DstAlign: Align(1), Src: CI->getArgOperand(i: 1),
4411 SrcAlign: Align(1), Size: CI->getArgOperand(i: 2));
4412 mergeAttributesAndFlags(NewCI, Old: *CI);
4413 return CI->getArgOperand(i: 0);
4414 }
4415 return nullptr;
4416}
4417
4418Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
4419 IRBuilderBase &B) {
4420 if (isFortifiedCallFoldable(CI, ObjSizeOp: 3, SizeOp: 2)) {
4421 Value *Val = B.CreateIntCast(V: CI->getArgOperand(i: 1), DestTy: B.getInt8Ty(), isSigned: false);
4422 CallInst *NewCI = B.CreateMemSet(Ptr: CI->getArgOperand(i: 0), Val,
4423 Size: CI->getArgOperand(i: 2), Align: Align(1));
4424 mergeAttributesAndFlags(NewCI, Old: *CI);
4425 return CI->getArgOperand(i: 0);
4426 }
4427 return nullptr;
4428}
4429
4430Value *FortifiedLibCallSimplifier::optimizeMemPCpyChk(CallInst *CI,
4431 IRBuilderBase &B) {
4432 const DataLayout &DL = CI->getDataLayout();
4433 if (isFortifiedCallFoldable(CI, ObjSizeOp: 3, SizeOp: 2))
4434 if (Value *Call = emitMemPCpy(Dst: CI->getArgOperand(i: 0), Src: CI->getArgOperand(i: 1),
4435 Len: CI->getArgOperand(i: 2), B, DL, TLI)) {
4436 return mergeAttributesAndFlags(NewCI: cast<CallInst>(Val: Call), Old: *CI);
4437 }
4438 return nullptr;
4439}
4440
4441Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
4442 IRBuilderBase &B,
4443 LibFunc Func) {
4444 const DataLayout &DL = CI->getDataLayout();
4445 Value *Dst = CI->getArgOperand(i: 0), *Src = CI->getArgOperand(i: 1),
4446 *ObjSize = CI->getArgOperand(i: 2);
4447
4448 // __stpcpy_chk(x,x,...) -> x+strlen(x)
4449 if (Func == LibFunc_stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
4450 Value *StrLen = emitStrLen(Ptr: Src, B, DL, TLI);
4451 return StrLen ? B.CreateInBoundsGEP(Ty: B.getInt8Ty(), Ptr: Dst, IdxList: StrLen) : nullptr;
4452 }
4453
4454 // If a) we don't have any length information, or b) we know this will
4455 // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
4456 // st[rp]cpy_chk call which may fail at runtime if the size is too long.
4457 // TODO: It might be nice to get a maximum length out of the possible
4458 // string lengths for varying.
4459 if (isFortifiedCallFoldable(CI, ObjSizeOp: 2, SizeOp: std::nullopt, StrOp: 1)) {
4460 if (Func == LibFunc_strcpy_chk)
4461 return copyFlags(Old: *CI, New: emitStrCpy(Dst, Src, B, TLI));
4462 else
4463 return copyFlags(Old: *CI, New: emitStpCpy(Dst, Src, B, TLI));
4464 }
4465
4466 if (OnlyLowerUnknownSize)
4467 return nullptr;
4468
4469 // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
4470 uint64_t Len = GetStringLength(V: Src);
4471 if (Len)
4472 annotateDereferenceableBytes(CI, ArgNos: 1, DereferenceableBytes: Len);
4473 else
4474 return nullptr;
4475
4476 unsigned SizeTBits = TLI->getSizeTSize(M: *CI->getModule());
4477 Type *SizeTTy = IntegerType::get(C&: CI->getContext(), NumBits: SizeTBits);
4478 Value *LenV = ConstantInt::get(Ty: SizeTTy, V: Len);
4479 Value *Ret = emitMemCpyChk(Dst, Src, Len: LenV, ObjSize, B, DL, TLI);
4480 // If the function was an __stpcpy_chk, and we were able to fold it into
4481 // a __memcpy_chk, we still need to return the correct end pointer.
4482 if (Ret && Func == LibFunc_stpcpy_chk)
4483 return B.CreateInBoundsGEP(Ty: B.getInt8Ty(), Ptr: Dst,
4484 IdxList: ConstantInt::get(Ty: SizeTTy, V: Len - 1));
4485 return copyFlags(Old: *CI, New: cast<CallInst>(Val: Ret));
4486}
4487
4488Value *FortifiedLibCallSimplifier::optimizeStrLenChk(CallInst *CI,
4489 IRBuilderBase &B) {
4490 if (isFortifiedCallFoldable(CI, ObjSizeOp: 1, SizeOp: std::nullopt, StrOp: 0))
4491 return copyFlags(Old: *CI, New: emitStrLen(Ptr: CI->getArgOperand(i: 0), B,
4492 DL: CI->getDataLayout(), TLI));
4493 return nullptr;
4494}
4495
4496Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
4497 IRBuilderBase &B,
4498 LibFunc Func) {
4499 if (isFortifiedCallFoldable(CI, ObjSizeOp: 3, SizeOp: 2)) {
4500 if (Func == LibFunc_strncpy_chk)
4501 return copyFlags(Old: *CI,
4502 New: emitStrNCpy(Dst: CI->getArgOperand(i: 0), Src: CI->getArgOperand(i: 1),
4503 Len: CI->getArgOperand(i: 2), B, TLI));
4504 else
4505 return copyFlags(Old: *CI,
4506 New: emitStpNCpy(Dst: CI->getArgOperand(i: 0), Src: CI->getArgOperand(i: 1),
4507 Len: CI->getArgOperand(i: 2), B, TLI));
4508 }
4509
4510 return nullptr;
4511}
4512
4513Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI,
4514 IRBuilderBase &B) {
4515 if (isFortifiedCallFoldable(CI, ObjSizeOp: 4, SizeOp: 3))
4516 return copyFlags(
4517 Old: *CI, New: emitMemCCpy(Ptr1: CI->getArgOperand(i: 0), Ptr2: CI->getArgOperand(i: 1),
4518 Val: CI->getArgOperand(i: 2), Len: CI->getArgOperand(i: 3), B, TLI));
4519
4520 return nullptr;
4521}
4522
4523Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI,
4524 IRBuilderBase &B) {
4525 if (isFortifiedCallFoldable(CI, ObjSizeOp: 3, SizeOp: 1, StrOp: std::nullopt, FlagOp: 2)) {
4526 SmallVector<Value *, 8> VariadicArgs(drop_begin(RangeOrContainer: CI->args(), N: 5));
4527 return copyFlags(Old: *CI,
4528 New: emitSNPrintf(Dest: CI->getArgOperand(i: 0), Size: CI->getArgOperand(i: 1),
4529 Fmt: CI->getArgOperand(i: 4), Args: VariadicArgs, B, TLI));
4530 }
4531
4532 return nullptr;
4533}
4534
4535Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI,
4536 IRBuilderBase &B) {
4537 if (isFortifiedCallFoldable(CI, ObjSizeOp: 2, SizeOp: std::nullopt, StrOp: std::nullopt, FlagOp: 1)) {
4538 SmallVector<Value *, 8> VariadicArgs(drop_begin(RangeOrContainer: CI->args(), N: 4));
4539 return copyFlags(Old: *CI,
4540 New: emitSPrintf(Dest: CI->getArgOperand(i: 0), Fmt: CI->getArgOperand(i: 3),
4541 VariadicArgs, B, TLI));
4542 }
4543
4544 return nullptr;
4545}
4546
4547Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI,
4548 IRBuilderBase &B) {
4549 if (isFortifiedCallFoldable(CI, ObjSizeOp: 2))
4550 return copyFlags(
4551 Old: *CI, New: emitStrCat(Dest: CI->getArgOperand(i: 0), Src: CI->getArgOperand(i: 1), B, TLI));
4552
4553 return nullptr;
4554}
4555
4556Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI,
4557 IRBuilderBase &B) {
4558 if (isFortifiedCallFoldable(CI, ObjSizeOp: 3))
4559 return copyFlags(Old: *CI,
4560 New: emitStrLCat(Dest: CI->getArgOperand(i: 0), Src: CI->getArgOperand(i: 1),
4561 Size: CI->getArgOperand(i: 2), B, TLI));
4562
4563 return nullptr;
4564}
4565
4566Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI,
4567 IRBuilderBase &B) {
4568 if (isFortifiedCallFoldable(CI, ObjSizeOp: 3))
4569 return copyFlags(Old: *CI,
4570 New: emitStrNCat(Dest: CI->getArgOperand(i: 0), Src: CI->getArgOperand(i: 1),
4571 Size: CI->getArgOperand(i: 2), B, TLI));
4572
4573 return nullptr;
4574}
4575
4576Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI,
4577 IRBuilderBase &B) {
4578 if (isFortifiedCallFoldable(CI, ObjSizeOp: 3))
4579 return copyFlags(Old: *CI,
4580 New: emitStrLCpy(Dest: CI->getArgOperand(i: 0), Src: CI->getArgOperand(i: 1),
4581 Size: CI->getArgOperand(i: 2), B, TLI));
4582
4583 return nullptr;
4584}
4585
4586Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI,
4587 IRBuilderBase &B) {
4588 if (isFortifiedCallFoldable(CI, ObjSizeOp: 3, SizeOp: 1, StrOp: std::nullopt, FlagOp: 2))
4589 return copyFlags(
4590 Old: *CI, New: emitVSNPrintf(Dest: CI->getArgOperand(i: 0), Size: CI->getArgOperand(i: 1),
4591 Fmt: CI->getArgOperand(i: 4), VAList: CI->getArgOperand(i: 5), B, TLI));
4592
4593 return nullptr;
4594}
4595
4596Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI,
4597 IRBuilderBase &B) {
4598 if (isFortifiedCallFoldable(CI, ObjSizeOp: 2, SizeOp: std::nullopt, StrOp: std::nullopt, FlagOp: 1))
4599 return copyFlags(Old: *CI,
4600 New: emitVSPrintf(Dest: CI->getArgOperand(i: 0), Fmt: CI->getArgOperand(i: 3),
4601 VAList: CI->getArgOperand(i: 4), B, TLI));
4602
4603 return nullptr;
4604}
4605
4606Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI,
4607 IRBuilderBase &Builder) {
4608 // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here.
4609 // Some clang users checked for _chk libcall availability using:
4610 // __has_builtin(__builtin___memcpy_chk)
4611 // When compiling with -fno-builtin, this is always true.
4612 // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we
4613 // end up with fortified libcalls, which isn't acceptable in a freestanding
4614 // environment which only provides their non-fortified counterparts.
4615 //
4616 // Until we change clang and/or teach external users to check for availability
4617 // differently, disregard the "nobuiltin" attribute and TLI::has.
4618 //
4619 // PR23093.
4620
4621 LibFunc Func;
4622 Function *Callee = CI->getCalledFunction();
4623 bool IsCallingConvC = TargetLibraryInfoImpl::isCallingConvCCompatible(CI);
4624
4625 SmallVector<OperandBundleDef, 2> OpBundles;
4626 CI->getOperandBundlesAsDefs(Defs&: OpBundles);
4627
4628 IRBuilderBase::OperandBundlesGuard Guard(Builder);
4629 Builder.setDefaultOperandBundles(OpBundles);
4630
4631 // First, check that this is a known library functions and that the prototype
4632 // is correct.
4633 if (!TLI->getLibFunc(FDecl: *Callee, F&: Func))
4634 return nullptr;
4635
4636 // We never change the calling convention.
4637 if (!ignoreCallingConv(Func) && !IsCallingConvC)
4638 return nullptr;
4639
4640 switch (Func) {
4641 case LibFunc_memcpy_chk:
4642 return optimizeMemCpyChk(CI, B&: Builder);
4643 case LibFunc_mempcpy_chk:
4644 return optimizeMemPCpyChk(CI, B&: Builder);
4645 case LibFunc_memmove_chk:
4646 return optimizeMemMoveChk(CI, B&: Builder);
4647 case LibFunc_memset_chk:
4648 return optimizeMemSetChk(CI, B&: Builder);
4649 case LibFunc_stpcpy_chk:
4650 case LibFunc_strcpy_chk:
4651 return optimizeStrpCpyChk(CI, B&: Builder, Func);
4652 case LibFunc_strlen_chk:
4653 return optimizeStrLenChk(CI, B&: Builder);
4654 case LibFunc_stpncpy_chk:
4655 case LibFunc_strncpy_chk:
4656 return optimizeStrpNCpyChk(CI, B&: Builder, Func);
4657 case LibFunc_memccpy_chk:
4658 return optimizeMemCCpyChk(CI, B&: Builder);
4659 case LibFunc_snprintf_chk:
4660 return optimizeSNPrintfChk(CI, B&: Builder);
4661 case LibFunc_sprintf_chk:
4662 return optimizeSPrintfChk(CI, B&: Builder);
4663 case LibFunc_strcat_chk:
4664 return optimizeStrCatChk(CI, B&: Builder);
4665 case LibFunc_strlcat_chk:
4666 return optimizeStrLCat(CI, B&: Builder);
4667 case LibFunc_strncat_chk:
4668 return optimizeStrNCatChk(CI, B&: Builder);
4669 case LibFunc_strlcpy_chk:
4670 return optimizeStrLCpyChk(CI, B&: Builder);
4671 case LibFunc_vsnprintf_chk:
4672 return optimizeVSNPrintfChk(CI, B&: Builder);
4673 case LibFunc_vsprintf_chk:
4674 return optimizeVSPrintfChk(CI, B&: Builder);
4675 default:
4676 break;
4677 }
4678 return nullptr;
4679}
4680
4681FortifiedLibCallSimplifier::FortifiedLibCallSimplifier(
4682 const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize)
4683 : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {}
4684