1 | //===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass tries to partially inline the fast path of well-known library |
10 | // functions, such as using square-root instructions for cases where sqrt() |
11 | // does not need to set errno. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" |
16 | #include "llvm/Analysis/DomTreeUpdater.h" |
17 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
18 | #include "llvm/Analysis/TargetLibraryInfo.h" |
19 | #include "llvm/Analysis/TargetTransformInfo.h" |
20 | #include "llvm/IR/Dominators.h" |
21 | #include "llvm/IR/IRBuilder.h" |
22 | #include "llvm/InitializePasses.h" |
23 | #include "llvm/Support/DebugCounter.h" |
24 | #include "llvm/Transforms/Scalar.h" |
25 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
26 | #include <optional> |
27 | |
28 | using namespace llvm; |
29 | |
30 | #define DEBUG_TYPE "partially-inline-libcalls" |
31 | |
32 | DEBUG_COUNTER(PILCounter, "partially-inline-libcalls-transform" , |
33 | "Controls transformations in partially-inline-libcalls" ); |
34 | |
35 | static bool optimizeSQRT(CallInst *Call, Function *CalledFunc, |
36 | BasicBlock &CurrBB, Function::iterator &BB, |
37 | const TargetTransformInfo *TTI, DomTreeUpdater *DTU, |
38 | OptimizationRemarkEmitter *ORE) { |
39 | // There is no need to change the IR, since backend will emit sqrt |
40 | // instruction if the call has already been marked read-only. |
41 | if (Call->onlyReadsMemory()) |
42 | return false; |
43 | |
44 | if (!DebugCounter::shouldExecute(CounterName: PILCounter)) |
45 | return false; |
46 | |
47 | // Do the following transformation: |
48 | // |
49 | // (before) |
50 | // dst = sqrt(src) |
51 | // |
52 | // (after) |
53 | // v0 = sqrt_noreadmem(src) # native sqrt instruction. |
54 | // [if (v0 is a NaN) || if (src < 0)] |
55 | // v1 = sqrt(src) # library call. |
56 | // dst = phi(v0, v1) |
57 | // |
58 | |
59 | Type *Ty = Call->getType(); |
60 | IRBuilder<> Builder(Call->getNextNode()); |
61 | |
62 | // Split CurrBB right after the call, create a 'then' block (that branches |
63 | // back to split-off tail of CurrBB) into which we'll insert a libcall. |
64 | Instruction *LibCallTerm = SplitBlockAndInsertIfThen( |
65 | Cond: Builder.getTrue(), SplitBefore: Call->getNextNode(), /*Unreachable=*/false, |
66 | /*BranchWeights*/ nullptr, DTU); |
67 | |
68 | auto *CurrBBTerm = cast<BranchInst>(Val: CurrBB.getTerminator()); |
69 | // We want an 'else' block though, not a 'then' block. |
70 | cast<BranchInst>(Val: CurrBBTerm)->swapSuccessors(); |
71 | |
72 | // Create phi that will merge results of either sqrt and replace all uses. |
73 | BasicBlock *JoinBB = LibCallTerm->getSuccessor(Idx: 0); |
74 | JoinBB->setName(CurrBB.getName() + ".split" ); |
75 | Builder.SetInsertPoint(TheBB: JoinBB, IP: JoinBB->begin()); |
76 | PHINode *Phi = Builder.CreatePHI(Ty, NumReservedValues: 2); |
77 | Call->replaceAllUsesWith(V: Phi); |
78 | |
79 | // Finally, insert the libcall into 'else' block. |
80 | BasicBlock *LibCallBB = LibCallTerm->getParent(); |
81 | LibCallBB->setName("call.sqrt" ); |
82 | Builder.SetInsertPoint(LibCallTerm); |
83 | Instruction *LibCall = Call->clone(); |
84 | Builder.Insert(I: LibCall); |
85 | |
86 | // Add memory(none) attribute, so that the backend can use a native sqrt |
87 | // instruction for this call. |
88 | Call->setDoesNotAccessMemory(); |
89 | |
90 | // Insert a FP compare instruction and use it as the CurrBB branch condition. |
91 | Builder.SetInsertPoint(CurrBBTerm); |
92 | Value *FCmp = TTI->isFCmpOrdCheaperThanFCmpZero(Ty) |
93 | ? Builder.CreateFCmpORD(LHS: Call, RHS: Call) |
94 | : Builder.CreateFCmpOGE(LHS: Call->getOperand(i_nocapture: 0), |
95 | RHS: ConstantFP::get(Ty, V: 0.0)); |
96 | CurrBBTerm->setCondition(FCmp); |
97 | |
98 | // Add phi operands. |
99 | Phi->addIncoming(V: Call, BB: &CurrBB); |
100 | Phi->addIncoming(V: LibCall, BB: LibCallBB); |
101 | |
102 | BB = JoinBB->getIterator(); |
103 | return true; |
104 | } |
105 | |
106 | static bool (Function &F, TargetLibraryInfo *TLI, |
107 | const TargetTransformInfo *TTI, |
108 | DominatorTree *DT, |
109 | OptimizationRemarkEmitter *ORE) { |
110 | std::optional<DomTreeUpdater> DTU; |
111 | if (DT) |
112 | DTU.emplace(args&: DT, args: DomTreeUpdater::UpdateStrategy::Lazy); |
113 | |
114 | bool Changed = false; |
115 | |
116 | Function::iterator CurrBB; |
117 | for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) { |
118 | CurrBB = BB++; |
119 | |
120 | for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end(); |
121 | II != IE; ++II) { |
122 | CallInst *Call = dyn_cast<CallInst>(Val: &*II); |
123 | Function *CalledFunc; |
124 | |
125 | if (!Call || !(CalledFunc = Call->getCalledFunction())) |
126 | continue; |
127 | |
128 | if (Call->isNoBuiltin() || Call->isStrictFP()) |
129 | continue; |
130 | |
131 | if (Call->isMustTailCall()) |
132 | continue; |
133 | |
134 | // Skip if function either has local linkage or is not a known library |
135 | // function. |
136 | LibFunc LF; |
137 | if (CalledFunc->hasLocalLinkage() || |
138 | !TLI->getLibFunc(FDecl: *CalledFunc, F&: LF) || !TLI->has(F: LF)) |
139 | continue; |
140 | |
141 | switch (LF) { |
142 | case LibFunc_sqrtf: |
143 | case LibFunc_sqrt: |
144 | if (TTI->haveFastSqrt(Ty: Call->getType()) && |
145 | optimizeSQRT(Call, CalledFunc, CurrBB&: *CurrBB, BB, TTI, |
146 | DTU: DTU ? &*DTU : nullptr, ORE)) |
147 | break; |
148 | continue; |
149 | default: |
150 | continue; |
151 | } |
152 | |
153 | Changed = true; |
154 | break; |
155 | } |
156 | } |
157 | |
158 | return Changed; |
159 | } |
160 | |
161 | PreservedAnalyses |
162 | PartiallyInlineLibCallsPass::run(Function &F, FunctionAnalysisManager &AM) { |
163 | auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F); |
164 | auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F); |
165 | auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(IR&: F); |
166 | auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F); |
167 | if (!runPartiallyInlineLibCalls(F, TLI: &TLI, TTI: &TTI, DT, ORE: &ORE)) |
168 | return PreservedAnalyses::all(); |
169 | PreservedAnalyses PA; |
170 | PA.preserve<DominatorTreeAnalysis>(); |
171 | return PA; |
172 | } |
173 | |
174 | namespace { |
175 | class PartiallyInlineLibCallsLegacyPass : public FunctionPass { |
176 | public: |
177 | static char ID; |
178 | |
179 | PartiallyInlineLibCallsLegacyPass() : FunctionPass(ID) { |
180 | initializePartiallyInlineLibCallsLegacyPassPass( |
181 | *PassRegistry::getPassRegistry()); |
182 | } |
183 | |
184 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
185 | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
186 | AU.addRequired<TargetTransformInfoWrapperPass>(); |
187 | AU.addPreserved<DominatorTreeWrapperPass>(); |
188 | AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); |
189 | FunctionPass::getAnalysisUsage(AU); |
190 | } |
191 | |
192 | bool runOnFunction(Function &F) override { |
193 | if (skipFunction(F)) |
194 | return false; |
195 | |
196 | TargetLibraryInfo *TLI = |
197 | &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); |
198 | const TargetTransformInfo *TTI = |
199 | &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); |
200 | DominatorTree *DT = nullptr; |
201 | if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) |
202 | DT = &DTWP->getDomTree(); |
203 | auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); |
204 | return runPartiallyInlineLibCalls(F, TLI, TTI, DT, ORE); |
205 | } |
206 | }; |
207 | } |
208 | |
209 | char PartiallyInlineLibCallsLegacyPass::ID = 0; |
210 | INITIALIZE_PASS_BEGIN(PartiallyInlineLibCallsLegacyPass, |
211 | "partially-inline-libcalls" , |
212 | "Partially inline calls to library functions" , false, |
213 | false) |
214 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
215 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
216 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
217 | INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) |
218 | INITIALIZE_PASS_END(PartiallyInlineLibCallsLegacyPass, |
219 | "partially-inline-libcalls" , |
220 | "Partially inline calls to library functions" , false, false) |
221 | |
222 | FunctionPass *llvm::createPartiallyInlineLibCallsPass() { |
223 | return new PartiallyInlineLibCallsLegacyPass(); |
224 | } |
225 | |