1 | //===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass tries to partially inline the fast path of well-known library |
10 | // functions, such as using square-root instructions for cases where sqrt() |
11 | // does not need to set errno. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" |
16 | #include "llvm/Analysis/DomTreeUpdater.h" |
17 | #include "llvm/Analysis/TargetLibraryInfo.h" |
18 | #include "llvm/Analysis/TargetTransformInfo.h" |
19 | #include "llvm/IR/Dominators.h" |
20 | #include "llvm/IR/IRBuilder.h" |
21 | #include "llvm/InitializePasses.h" |
22 | #include "llvm/Support/DebugCounter.h" |
23 | #include "llvm/Transforms/Scalar.h" |
24 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
25 | #include <optional> |
26 | |
27 | using namespace llvm; |
28 | |
29 | #define DEBUG_TYPE "partially-inline-libcalls" |
30 | |
31 | DEBUG_COUNTER(PILCounter, "partially-inline-libcalls-transform" , |
32 | "Controls transformations in partially-inline-libcalls" ); |
33 | |
34 | static bool optimizeSQRT(CallInst *Call, Function *CalledFunc, |
35 | BasicBlock &CurrBB, Function::iterator &BB, |
36 | const TargetTransformInfo *TTI, DomTreeUpdater *DTU) { |
37 | // There is no need to change the IR, since backend will emit sqrt |
38 | // instruction if the call has already been marked read-only. |
39 | if (Call->onlyReadsMemory()) |
40 | return false; |
41 | |
42 | if (!DebugCounter::shouldExecute(CounterName: PILCounter)) |
43 | return false; |
44 | |
45 | // Do the following transformation: |
46 | // |
47 | // (before) |
48 | // dst = sqrt(src) |
49 | // |
50 | // (after) |
51 | // v0 = sqrt_noreadmem(src) # native sqrt instruction. |
52 | // [if (v0 is a NaN) || if (src < 0)] |
53 | // v1 = sqrt(src) # library call. |
54 | // dst = phi(v0, v1) |
55 | // |
56 | |
57 | Type *Ty = Call->getType(); |
58 | IRBuilder<> Builder(Call->getNextNode()); |
59 | |
60 | // Split CurrBB right after the call, create a 'then' block (that branches |
61 | // back to split-off tail of CurrBB) into which we'll insert a libcall. |
62 | Instruction *LibCallTerm = SplitBlockAndInsertIfThen( |
63 | Cond: Builder.getTrue(), SplitBefore: Call->getNextNode(), /*Unreachable=*/false, |
64 | /*BranchWeights*/ nullptr, DTU); |
65 | |
66 | auto *CurrBBTerm = cast<BranchInst>(Val: CurrBB.getTerminator()); |
67 | // We want an 'else' block though, not a 'then' block. |
68 | cast<BranchInst>(Val: CurrBBTerm)->swapSuccessors(); |
69 | |
70 | // Create phi that will merge results of either sqrt and replace all uses. |
71 | BasicBlock *JoinBB = LibCallTerm->getSuccessor(Idx: 0); |
72 | JoinBB->setName(CurrBB.getName() + ".split" ); |
73 | Builder.SetInsertPoint(TheBB: JoinBB, IP: JoinBB->begin()); |
74 | PHINode *Phi = Builder.CreatePHI(Ty, NumReservedValues: 2); |
75 | Call->replaceAllUsesWith(V: Phi); |
76 | |
77 | // Finally, insert the libcall into 'else' block. |
78 | BasicBlock *LibCallBB = LibCallTerm->getParent(); |
79 | LibCallBB->setName("call.sqrt" ); |
80 | Builder.SetInsertPoint(LibCallTerm); |
81 | Instruction *LibCall = Call->clone(); |
82 | Builder.Insert(I: LibCall); |
83 | |
84 | // Add memory(none) attribute, so that the backend can use a native sqrt |
85 | // instruction for this call. |
86 | Call->setDoesNotAccessMemory(); |
87 | |
88 | // Insert a FP compare instruction and use it as the CurrBB branch condition. |
89 | Builder.SetInsertPoint(CurrBBTerm); |
90 | Value *FCmp = TTI->isFCmpOrdCheaperThanFCmpZero(Ty) |
91 | ? Builder.CreateFCmpORD(LHS: Call, RHS: Call) |
92 | : Builder.CreateFCmpOGE(LHS: Call->getOperand(i_nocapture: 0), |
93 | RHS: ConstantFP::get(Ty, V: 0.0)); |
94 | CurrBBTerm->setCondition(FCmp); |
95 | |
96 | // Add phi operands. |
97 | Phi->addIncoming(V: Call, BB: &CurrBB); |
98 | Phi->addIncoming(V: LibCall, BB: LibCallBB); |
99 | |
100 | BB = JoinBB->getIterator(); |
101 | return true; |
102 | } |
103 | |
104 | static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI, |
105 | const TargetTransformInfo *TTI, |
106 | DominatorTree *DT) { |
107 | std::optional<DomTreeUpdater> DTU; |
108 | if (DT) |
109 | DTU.emplace(args&: DT, args: DomTreeUpdater::UpdateStrategy::Lazy); |
110 | |
111 | bool Changed = false; |
112 | |
113 | Function::iterator CurrBB; |
114 | for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) { |
115 | CurrBB = BB++; |
116 | |
117 | for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end(); |
118 | II != IE; ++II) { |
119 | CallInst *Call = dyn_cast<CallInst>(Val: &*II); |
120 | Function *CalledFunc; |
121 | |
122 | if (!Call || !(CalledFunc = Call->getCalledFunction())) |
123 | continue; |
124 | |
125 | if (Call->isNoBuiltin() || Call->isStrictFP()) |
126 | continue; |
127 | |
128 | if (Call->isMustTailCall()) |
129 | continue; |
130 | |
131 | // Skip if function either has local linkage or is not a known library |
132 | // function. |
133 | LibFunc LF; |
134 | if (CalledFunc->hasLocalLinkage() || |
135 | !TLI->getLibFunc(FDecl: *CalledFunc, F&: LF) || !TLI->has(F: LF)) |
136 | continue; |
137 | |
138 | switch (LF) { |
139 | case LibFunc_sqrtf: |
140 | case LibFunc_sqrt: |
141 | if (TTI->haveFastSqrt(Ty: Call->getType()) && |
142 | optimizeSQRT(Call, CalledFunc, CurrBB&: *CurrBB, BB, TTI, |
143 | DTU: DTU ? &*DTU : nullptr)) |
144 | break; |
145 | continue; |
146 | default: |
147 | continue; |
148 | } |
149 | |
150 | Changed = true; |
151 | break; |
152 | } |
153 | } |
154 | |
155 | return Changed; |
156 | } |
157 | |
158 | PreservedAnalyses |
159 | PartiallyInlineLibCallsPass::run(Function &F, FunctionAnalysisManager &AM) { |
160 | auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F); |
161 | auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F); |
162 | auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(IR&: F); |
163 | if (!runPartiallyInlineLibCalls(F, TLI: &TLI, TTI: &TTI, DT)) |
164 | return PreservedAnalyses::all(); |
165 | PreservedAnalyses PA; |
166 | PA.preserve<DominatorTreeAnalysis>(); |
167 | return PA; |
168 | } |
169 | |
170 | namespace { |
171 | class PartiallyInlineLibCallsLegacyPass : public FunctionPass { |
172 | public: |
173 | static char ID; |
174 | |
175 | PartiallyInlineLibCallsLegacyPass() : FunctionPass(ID) { |
176 | initializePartiallyInlineLibCallsLegacyPassPass( |
177 | *PassRegistry::getPassRegistry()); |
178 | } |
179 | |
180 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
181 | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
182 | AU.addRequired<TargetTransformInfoWrapperPass>(); |
183 | AU.addPreserved<DominatorTreeWrapperPass>(); |
184 | FunctionPass::getAnalysisUsage(AU); |
185 | } |
186 | |
187 | bool runOnFunction(Function &F) override { |
188 | if (skipFunction(F)) |
189 | return false; |
190 | |
191 | TargetLibraryInfo *TLI = |
192 | &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); |
193 | const TargetTransformInfo *TTI = |
194 | &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); |
195 | DominatorTree *DT = nullptr; |
196 | if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) |
197 | DT = &DTWP->getDomTree(); |
198 | return runPartiallyInlineLibCalls(F, TLI, TTI, DT); |
199 | } |
200 | }; |
201 | } |
202 | |
203 | char PartiallyInlineLibCallsLegacyPass::ID = 0; |
204 | INITIALIZE_PASS_BEGIN(PartiallyInlineLibCallsLegacyPass, |
205 | "partially-inline-libcalls" , |
206 | "Partially inline calls to library functions" , false, |
207 | false) |
208 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
209 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
210 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
211 | INITIALIZE_PASS_END(PartiallyInlineLibCallsLegacyPass, |
212 | "partially-inline-libcalls" , |
213 | "Partially inline calls to library functions" , false, false) |
214 | |
215 | FunctionPass *llvm::createPartiallyInlineLibCallsPass() { |
216 | return new PartiallyInlineLibCallsLegacyPass(); |
217 | } |
218 | |