1 | //===- ReduceOpcodes.cpp - Specialized Delta Pass -------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Try to replace instructions that are likely to codegen to simpler or smaller |
10 | // sequences. This is a fuzzy and target specific concept. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "ReduceOpcodes.h" |
15 | #include "llvm/IR/IRBuilder.h" |
16 | #include "llvm/IR/Instructions.h" |
17 | #include "llvm/IR/IntrinsicInst.h" |
18 | #include "llvm/IR/Intrinsics.h" |
19 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
20 | |
21 | using namespace llvm; |
22 | |
23 | // Assume outgoing undef arguments aren't relevant. |
24 | // TODO: Maybe skip any trivial constant arguments. |
25 | static bool shouldIgnoreArgument(const Value *V) { |
26 | return isa<UndefValue>(Val: V); |
27 | } |
28 | |
29 | static Value *replaceIntrinsic(Module &M, IntrinsicInst *II, |
30 | Intrinsic::ID NewIID, |
31 | ArrayRef<Type *> Tys = {}) { |
32 | Function *NewFunc = Intrinsic::getOrInsertDeclaration(M: &M, id: NewIID, Tys); |
33 | II->setCalledFunction(NewFunc); |
34 | return II; |
35 | } |
36 | |
37 | static Value *reduceIntrinsic(Oracle &O, Module &M, IntrinsicInst *II) { |
38 | IRBuilder<> B(II); |
39 | switch (II->getIntrinsicID()) { |
40 | case Intrinsic::sqrt: |
41 | if (O.shouldKeep()) |
42 | return nullptr; |
43 | |
44 | return B.CreateFMul(L: II->getArgOperand(i: 0), |
45 | R: ConstantFP::get(Ty: II->getType(), V: 2.0)); |
46 | case Intrinsic::minnum: |
47 | case Intrinsic::maxnum: |
48 | case Intrinsic::minimum: |
49 | case Intrinsic::maximum: |
50 | case Intrinsic::amdgcn_fmul_legacy: |
51 | if (O.shouldKeep()) |
52 | return nullptr; |
53 | return B.CreateFMul(L: II->getArgOperand(i: 0), R: II->getArgOperand(i: 1)); |
54 | case Intrinsic::amdgcn_workitem_id_y: |
55 | case Intrinsic::amdgcn_workitem_id_z: |
56 | if (O.shouldKeep()) |
57 | return nullptr; |
58 | return replaceIntrinsic(M, II, NewIID: Intrinsic::amdgcn_workitem_id_x); |
59 | case Intrinsic::amdgcn_workgroup_id_y: |
60 | case Intrinsic::amdgcn_workgroup_id_z: |
61 | if (O.shouldKeep()) |
62 | return nullptr; |
63 | return replaceIntrinsic(M, II, NewIID: Intrinsic::amdgcn_workgroup_id_x); |
64 | case Intrinsic::amdgcn_div_fixup: |
65 | case Intrinsic::amdgcn_fma_legacy: |
66 | if (O.shouldKeep()) |
67 | return nullptr; |
68 | return replaceIntrinsic(M, II, NewIID: Intrinsic::fma, Tys: {II->getType()}); |
69 | default: |
70 | return nullptr; |
71 | } |
72 | } |
73 | |
74 | /// Look for calls that look like they could be replaced with a load or store. |
75 | static bool callLooksLikeLoadStore(CallBase *CB, Value *&DataArg, |
76 | Value *&PtrArg) { |
77 | const bool IsStore = CB->getType()->isVoidTy(); |
78 | |
79 | PtrArg = nullptr; |
80 | DataArg = nullptr; |
81 | for (Value *Arg : CB->args()) { |
82 | if (shouldIgnoreArgument(V: Arg)) |
83 | continue; |
84 | |
85 | if (!Arg->getType()->isSized()) |
86 | return false; |
87 | |
88 | if (!PtrArg && Arg->getType()->isPointerTy()) { |
89 | PtrArg = Arg; |
90 | continue; |
91 | } |
92 | |
93 | if (!IsStore || DataArg) |
94 | return false; |
95 | |
96 | DataArg = Arg; |
97 | } |
98 | |
99 | if (IsStore && !DataArg) { |
100 | // FIXME: For typed pointers, use element type? |
101 | DataArg = ConstantInt::get(Ty: IntegerType::getInt32Ty(C&: CB->getContext()), V: 0); |
102 | } |
103 | |
104 | // If we didn't find any arguments, we can fill in the pointer. |
105 | if (!PtrArg) { |
106 | unsigned AS = CB->getDataLayout().getAllocaAddrSpace(); |
107 | |
108 | PointerType *PtrTy = PointerType::get(C&: CB->getContext(), AddressSpace: AS); |
109 | |
110 | PtrArg = ConstantPointerNull::get(T: PtrTy); |
111 | } |
112 | |
113 | return true; |
114 | } |
115 | |
116 | // TODO: Replace 2 pointer argument calls with memcpy |
117 | static Value *tryReplaceCallWithLoadStore(Oracle &O, Module &M, CallBase *CB) { |
118 | Value *PtrArg = nullptr; |
119 | Value *DataArg = nullptr; |
120 | if (!callLooksLikeLoadStore(CB, DataArg, PtrArg) || O.shouldKeep()) |
121 | return nullptr; |
122 | |
123 | IRBuilder<> B(CB); |
124 | if (DataArg) |
125 | return B.CreateStore(Val: DataArg, Ptr: PtrArg, isVolatile: true); |
126 | return B.CreateLoad(Ty: CB->getType(), Ptr: PtrArg, isVolatile: true); |
127 | } |
128 | |
129 | static bool callLooksLikeOperator(CallBase *CB, |
130 | SmallVectorImpl<Value *> &OperatorArgs) { |
131 | Type *ReturnTy = CB->getType(); |
132 | if (!ReturnTy->isFirstClassType()) |
133 | return false; |
134 | |
135 | for (Value *Arg : CB->args()) { |
136 | if (shouldIgnoreArgument(V: Arg)) |
137 | continue; |
138 | |
139 | if (Arg->getType() != ReturnTy) |
140 | return false; |
141 | |
142 | OperatorArgs.push_back(Elt: Arg); |
143 | } |
144 | |
145 | return true; |
146 | } |
147 | |
148 | static Value *tryReplaceCallWithOperator(Oracle &O, Module &M, CallBase *CB) { |
149 | SmallVector<Value *, 4> Arguments; |
150 | |
151 | if (!callLooksLikeOperator(CB, OperatorArgs&: Arguments) || Arguments.size() > 3) |
152 | return nullptr; |
153 | |
154 | if (O.shouldKeep()) |
155 | return nullptr; |
156 | |
157 | IRBuilder<> B(CB); |
158 | if (CB->getType()->isFPOrFPVectorTy()) { |
159 | switch (Arguments.size()) { |
160 | case 1: |
161 | return B.CreateFNeg(V: Arguments[0]); |
162 | case 2: |
163 | return B.CreateFMul(L: Arguments[0], R: Arguments[1]); |
164 | case 3: |
165 | return B.CreateIntrinsic(ID: Intrinsic::fma, Types: {CB->getType()}, Args: Arguments); |
166 | default: |
167 | return nullptr; |
168 | } |
169 | |
170 | llvm_unreachable("all argument sizes handled" ); |
171 | } |
172 | |
173 | if (CB->getType()->isIntOrIntVectorTy()) { |
174 | switch (Arguments.size()) { |
175 | case 1: |
176 | return B.CreateUnaryIntrinsic(ID: Intrinsic::bswap, V: Arguments[0]); |
177 | case 2: |
178 | return B.CreateAnd(LHS: Arguments[0], RHS: Arguments[1]); |
179 | case 3: |
180 | return B.CreateIntrinsic(ID: Intrinsic::fshl, Types: {CB->getType()}, Args: Arguments); |
181 | default: |
182 | return nullptr; |
183 | } |
184 | |
185 | llvm_unreachable("all argument sizes handled" ); |
186 | } |
187 | |
188 | return nullptr; |
189 | } |
190 | |
191 | static Value *reduceInstruction(Oracle &O, Module &M, Instruction &I) { |
192 | IRBuilder<> B(&I); |
193 | |
194 | // TODO: fp binary operator with constant to fneg |
195 | switch (I.getOpcode()) { |
196 | case Instruction::FDiv: |
197 | case Instruction::FRem: |
198 | if (O.shouldKeep()) |
199 | return nullptr; |
200 | |
201 | // Divisions tends to codegen into a long sequence or a library call. |
202 | return B.CreateFMul(L: I.getOperand(i: 0), R: I.getOperand(i: 1)); |
203 | case Instruction::UDiv: |
204 | case Instruction::SDiv: |
205 | case Instruction::URem: |
206 | case Instruction::SRem: |
207 | if (O.shouldKeep()) |
208 | return nullptr; |
209 | |
210 | // Divisions tends to codegen into a long sequence or a library call. |
211 | return B.CreateMul(LHS: I.getOperand(i: 0), RHS: I.getOperand(i: 1)); |
212 | case Instruction::Add: |
213 | case Instruction::Sub: { |
214 | if (O.shouldKeep()) |
215 | return nullptr; |
216 | |
217 | // Add/sub are more likely codegen to instructions with carry out side |
218 | // effects. |
219 | return B.CreateOr(LHS: I.getOperand(i: 0), RHS: I.getOperand(i: 1)); |
220 | } |
221 | case Instruction::Call: { |
222 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &I)) |
223 | return reduceIntrinsic(O, M, II); |
224 | |
225 | CallBase *CB = cast<CallBase>(Val: &I); |
226 | |
227 | if (Value *NewOp = tryReplaceCallWithOperator(O, M, CB)) |
228 | return NewOp; |
229 | |
230 | if (Value *NewOp = tryReplaceCallWithLoadStore(O, M, CB)) |
231 | return NewOp; |
232 | |
233 | return nullptr; |
234 | } |
235 | default: |
236 | return nullptr; |
237 | } |
238 | |
239 | return nullptr; |
240 | } |
241 | |
242 | void llvm::reduceOpcodesDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { |
243 | Module &Mod = WorkItem.getModule(); |
244 | |
245 | for (Function &F : Mod) { |
246 | for (BasicBlock &BB : F) |
247 | for (Instruction &I : make_early_inc_range(Range&: BB)) { |
248 | Instruction *Replacement = |
249 | dyn_cast_or_null<Instruction>(Val: reduceInstruction(O, M&: Mod, I)); |
250 | if (Replacement && Replacement != &I) { |
251 | if (isa<FPMathOperator>(Val: Replacement)) |
252 | Replacement->copyFastMathFlags(I: &I); |
253 | |
254 | Replacement->copyIRFlags(V: &I); |
255 | Replacement->copyMetadata(SrcInst: I); |
256 | Replacement->takeName(V: &I); |
257 | I.replaceAllUsesWith(V: Replacement); |
258 | I.eraseFromParent(); |
259 | } |
260 | } |
261 | } |
262 | } |
263 | |