1//===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass tries to partially inline the fast path of well-known library
10// functions, such as using square-root instructions for cases where sqrt()
11// does not need to set errno.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
16#include "llvm/Analysis/DomTreeUpdater.h"
17#include "llvm/Analysis/OptimizationRemarkEmitter.h"
18#include "llvm/Analysis/TargetLibraryInfo.h"
19#include "llvm/Analysis/TargetTransformInfo.h"
20#include "llvm/IR/Dominators.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/MDBuilder.h"
23#include "llvm/InitializePasses.h"
24#include "llvm/Support/DebugCounter.h"
25#include "llvm/Transforms/Scalar.h"
26#include "llvm/Transforms/Utils/BasicBlockUtils.h"
27#include <optional>
28
29using namespace llvm;
30
31namespace llvm {
32extern cl::opt<bool> ProfcheckDisableMetadataFixes;
33} // namespace llvm
34
35#define DEBUG_TYPE "partially-inline-libcalls"
36
37DEBUG_COUNTER(PILCounter, "partially-inline-libcalls-transform",
38 "Controls transformations in partially-inline-libcalls");
39
40static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
41 BasicBlock &CurrBB, Function::iterator &BB,
42 const TargetTransformInfo *TTI, DomTreeUpdater *DTU,
43 OptimizationRemarkEmitter *ORE) {
44 // There is no need to change the IR, since backend will emit sqrt
45 // instruction if the call has already been marked read-only.
46 if (Call->onlyReadsMemory())
47 return false;
48
49 if (!DebugCounter::shouldExecute(Counter&: PILCounter))
50 return false;
51
52 // Do the following transformation:
53 //
54 // (before)
55 // dst = sqrt(src)
56 //
57 // (after)
58 // v0 = sqrt_noreadmem(src) # native sqrt instruction.
59 // [if (v0 is a NaN) || if (src < 0)]
60 // v1 = sqrt(src) # library call.
61 // dst = phi(v0, v1)
62 //
63
64 Type *Ty = Call->getType();
65 IRBuilder<> Builder(Call->getNextNode());
66
67 // Split CurrBB right after the call, create a 'then' block (that branches
68 // back to split-off tail of CurrBB) into which we'll insert a libcall.
69 Instruction *LibCallTerm = SplitBlockAndInsertIfThen(
70 Cond: Builder.getTrue(), SplitBefore: Call->getNextNode(), /*Unreachable=*/false,
71 /*BranchWeights*/ nullptr, DTU);
72
73 auto *CurrBBTerm = cast<BranchInst>(Val: CurrBB.getTerminator());
74 // We want an 'else' block though, not a 'then' block.
75 cast<BranchInst>(Val: CurrBBTerm)->swapSuccessors();
76
77 // Create phi that will merge results of either sqrt and replace all uses.
78 BasicBlock *JoinBB = LibCallTerm->getSuccessor(Idx: 0);
79 JoinBB->setName(CurrBB.getName() + ".split");
80 Builder.SetInsertPoint(TheBB: JoinBB, IP: JoinBB->begin());
81 PHINode *Phi = Builder.CreatePHI(Ty, NumReservedValues: 2);
82 Call->replaceAllUsesWith(V: Phi);
83
84 // Finally, insert the libcall into 'else' block.
85 BasicBlock *LibCallBB = LibCallTerm->getParent();
86 LibCallBB->setName("call.sqrt");
87 Builder.SetInsertPoint(LibCallTerm);
88 Instruction *LibCall = Call->clone();
89 Builder.Insert(I: LibCall);
90
91 // Add memory(none) attribute, so that the backend can use a native sqrt
92 // instruction for this call.
93 Call->setDoesNotAccessMemory();
94
95 // Insert a FP compare instruction and use it as the CurrBB branch condition.
96 Builder.SetInsertPoint(CurrBBTerm);
97 Value *FCmp = TTI->isFCmpOrdCheaperThanFCmpZero(Ty)
98 ? Builder.CreateFCmpORD(LHS: Call, RHS: Call)
99 : Builder.CreateFCmpOGE(LHS: Call->getOperand(i_nocapture: 0),
100 RHS: ConstantFP::get(Ty, V: 0.0));
101 CurrBBTerm->setCondition(FCmp);
102 if (!ProfcheckDisableMetadataFixes &&
103 CurrBBTerm->getFunction()->getEntryCount()) {
104 // Presume the quick path - where we don't call the library call - is the
105 // frequent one
106 MDBuilder MDB(CurrBBTerm->getContext());
107 CurrBBTerm->setMetadata(KindID: LLVMContext::MD_prof,
108 Node: MDB.createLikelyBranchWeights());
109 }
110 // Add phi operands.
111 Phi->addIncoming(V: Call, BB: &CurrBB);
112 Phi->addIncoming(V: LibCall, BB: LibCallBB);
113
114 BB = JoinBB->getIterator();
115 return true;
116}
117
118static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI,
119 const TargetTransformInfo *TTI,
120 DominatorTree *DT,
121 OptimizationRemarkEmitter *ORE) {
122 std::optional<DomTreeUpdater> DTU;
123 if (DT)
124 DTU.emplace(args&: DT, args: DomTreeUpdater::UpdateStrategy::Lazy);
125
126 bool Changed = false;
127
128 Function::iterator CurrBB;
129 for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
130 CurrBB = BB++;
131
132 for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end();
133 II != IE; ++II) {
134 CallInst *Call = dyn_cast<CallInst>(Val: &*II);
135 Function *CalledFunc;
136
137 if (!Call || !(CalledFunc = Call->getCalledFunction()))
138 continue;
139
140 if (Call->isNoBuiltin() || Call->isStrictFP())
141 continue;
142
143 if (Call->isMustTailCall())
144 continue;
145
146 // Skip if function either has local linkage or is not a known library
147 // function.
148 LibFunc LF;
149 if (CalledFunc->hasLocalLinkage() ||
150 !TLI->getLibFunc(FDecl: *CalledFunc, F&: LF) || !TLI->has(F: LF))
151 continue;
152
153 switch (LF) {
154 case LibFunc_sqrtf:
155 case LibFunc_sqrt:
156 if (TTI->haveFastSqrt(Ty: Call->getType()) &&
157 optimizeSQRT(Call, CalledFunc, CurrBB&: *CurrBB, BB, TTI,
158 DTU: DTU ? &*DTU : nullptr, ORE))
159 break;
160 continue;
161 default:
162 continue;
163 }
164
165 Changed = true;
166 break;
167 }
168 }
169
170 return Changed;
171}
172
173PreservedAnalyses
174PartiallyInlineLibCallsPass::run(Function &F, FunctionAnalysisManager &AM) {
175 auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
176 auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
177 auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(IR&: F);
178 auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
179 if (!runPartiallyInlineLibCalls(F, TLI: &TLI, TTI: &TTI, DT, ORE: &ORE))
180 return PreservedAnalyses::all();
181 PreservedAnalyses PA;
182 PA.preserve<DominatorTreeAnalysis>();
183 return PA;
184}
185
186namespace {
187class PartiallyInlineLibCallsLegacyPass : public FunctionPass {
188public:
189 static char ID;
190
191 PartiallyInlineLibCallsLegacyPass() : FunctionPass(ID) {
192 initializePartiallyInlineLibCallsLegacyPassPass(
193 *PassRegistry::getPassRegistry());
194 }
195
196 void getAnalysisUsage(AnalysisUsage &AU) const override {
197 AU.addRequired<TargetLibraryInfoWrapperPass>();
198 AU.addRequired<TargetTransformInfoWrapperPass>();
199 AU.addPreserved<DominatorTreeWrapperPass>();
200 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
201 FunctionPass::getAnalysisUsage(AU);
202 }
203
204 bool runOnFunction(Function &F) override {
205 if (skipFunction(F))
206 return false;
207
208 TargetLibraryInfo *TLI =
209 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
210 const TargetTransformInfo *TTI =
211 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
212 DominatorTree *DT = nullptr;
213 if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
214 DT = &DTWP->getDomTree();
215 auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
216 return runPartiallyInlineLibCalls(F, TLI, TTI, DT, ORE);
217 }
218};
219}
220
221char PartiallyInlineLibCallsLegacyPass::ID = 0;
222INITIALIZE_PASS_BEGIN(PartiallyInlineLibCallsLegacyPass,
223 "partially-inline-libcalls",
224 "Partially inline calls to library functions", false,
225 false)
226INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
227INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
228INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
229INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
230INITIALIZE_PASS_END(PartiallyInlineLibCallsLegacyPass,
231 "partially-inline-libcalls",
232 "Partially inline calls to library functions", false, false)
233
234FunctionPass *llvm::createPartiallyInlineLibCallsPass() {
235 return new PartiallyInlineLibCallsLegacyPass();
236}
237