1 | //===- ReduceOpcodes.cpp - Specialized Delta Pass -------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Try to replace instructions that are likely to codegen to simpler or smaller |
10 | // sequences. This is a fuzzy and target specific concept. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "ReduceOpcodes.h" |
15 | #include "Delta.h" |
16 | #include "llvm/IR/IRBuilder.h" |
17 | #include "llvm/IR/Instructions.h" |
18 | #include "llvm/IR/IntrinsicInst.h" |
19 | #include "llvm/IR/Intrinsics.h" |
20 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
21 | |
22 | using namespace llvm; |
23 | |
24 | // Assume outgoing undef arguments aren't relevant. |
25 | // TODO: Maybe skip any trivial constant arguments. |
26 | static bool shouldIgnoreArgument(const Value *V) { |
27 | return isa<UndefValue>(Val: V); |
28 | } |
29 | |
30 | static Value *replaceIntrinsic(Module &M, IntrinsicInst *II, |
31 | Intrinsic::ID NewIID, |
32 | ArrayRef<Type *> Tys = std::nullopt) { |
33 | Function *NewFunc = Intrinsic::getDeclaration(M: &M, id: NewIID, Tys); |
34 | II->setCalledFunction(NewFunc); |
35 | return II; |
36 | } |
37 | |
38 | static Value *reduceIntrinsic(Oracle &O, Module &M, IntrinsicInst *II) { |
39 | IRBuilder<> B(II); |
40 | switch (II->getIntrinsicID()) { |
41 | case Intrinsic::sqrt: |
42 | if (O.shouldKeep()) |
43 | return nullptr; |
44 | |
45 | return B.CreateFMul(L: II->getArgOperand(i: 0), |
46 | R: ConstantFP::get(Ty: II->getType(), V: 2.0)); |
47 | case Intrinsic::minnum: |
48 | case Intrinsic::maxnum: |
49 | case Intrinsic::minimum: |
50 | case Intrinsic::maximum: |
51 | case Intrinsic::amdgcn_fmul_legacy: |
52 | if (O.shouldKeep()) |
53 | return nullptr; |
54 | return B.CreateFMul(L: II->getArgOperand(i: 0), R: II->getArgOperand(i: 1)); |
55 | case Intrinsic::amdgcn_workitem_id_y: |
56 | case Intrinsic::amdgcn_workitem_id_z: |
57 | if (O.shouldKeep()) |
58 | return nullptr; |
59 | return replaceIntrinsic(M, II, NewIID: Intrinsic::amdgcn_workitem_id_x); |
60 | case Intrinsic::amdgcn_workgroup_id_y: |
61 | case Intrinsic::amdgcn_workgroup_id_z: |
62 | if (O.shouldKeep()) |
63 | return nullptr; |
64 | return replaceIntrinsic(M, II, NewIID: Intrinsic::amdgcn_workgroup_id_x); |
65 | case Intrinsic::amdgcn_div_fixup: |
66 | case Intrinsic::amdgcn_fma_legacy: |
67 | if (O.shouldKeep()) |
68 | return nullptr; |
69 | return replaceIntrinsic(M, II, NewIID: Intrinsic::fma, Tys: {II->getType()}); |
70 | default: |
71 | return nullptr; |
72 | } |
73 | } |
74 | |
75 | /// Look for calls that look like they could be replaced with a load or store. |
76 | static bool callLooksLikeLoadStore(CallBase *CB, Value *&DataArg, |
77 | Value *&PtrArg) { |
78 | const bool IsStore = CB->getType()->isVoidTy(); |
79 | |
80 | PtrArg = nullptr; |
81 | DataArg = nullptr; |
82 | for (Value *Arg : CB->args()) { |
83 | if (shouldIgnoreArgument(V: Arg)) |
84 | continue; |
85 | |
86 | if (!Arg->getType()->isSized()) |
87 | return false; |
88 | |
89 | if (!PtrArg && Arg->getType()->isPointerTy()) { |
90 | PtrArg = Arg; |
91 | continue; |
92 | } |
93 | |
94 | if (!IsStore || DataArg) |
95 | return false; |
96 | |
97 | DataArg = Arg; |
98 | } |
99 | |
100 | if (IsStore && !DataArg) { |
101 | // FIXME: For typed pointers, use element type? |
102 | DataArg = ConstantInt::get(Ty: IntegerType::getInt32Ty(C&: CB->getContext()), V: 0); |
103 | } |
104 | |
105 | // If we didn't find any arguments, we can fill in the pointer. |
106 | if (!PtrArg) { |
107 | unsigned AS = CB->getDataLayout().getAllocaAddrSpace(); |
108 | |
109 | PointerType *PtrTy = |
110 | PointerType::get(ElementType: DataArg ? DataArg->getType() |
111 | : IntegerType::getInt32Ty(C&: CB->getContext()), |
112 | AddressSpace: AS); |
113 | |
114 | PtrArg = ConstantPointerNull::get(T: PtrTy); |
115 | } |
116 | |
117 | return true; |
118 | } |
119 | |
120 | // TODO: Replace 2 pointer argument calls with memcpy |
121 | static Value *tryReplaceCallWithLoadStore(Oracle &O, Module &M, CallBase *CB) { |
122 | Value *PtrArg = nullptr; |
123 | Value *DataArg = nullptr; |
124 | if (!callLooksLikeLoadStore(CB, DataArg, PtrArg) || O.shouldKeep()) |
125 | return nullptr; |
126 | |
127 | IRBuilder<> B(CB); |
128 | if (DataArg) |
129 | return B.CreateStore(Val: DataArg, Ptr: PtrArg, isVolatile: true); |
130 | return B.CreateLoad(Ty: CB->getType(), Ptr: PtrArg, isVolatile: true); |
131 | } |
132 | |
133 | static bool callLooksLikeOperator(CallBase *CB, |
134 | SmallVectorImpl<Value *> &OperatorArgs) { |
135 | Type *ReturnTy = CB->getType(); |
136 | if (!ReturnTy->isFirstClassType()) |
137 | return false; |
138 | |
139 | for (Value *Arg : CB->args()) { |
140 | if (shouldIgnoreArgument(V: Arg)) |
141 | continue; |
142 | |
143 | if (Arg->getType() != ReturnTy) |
144 | return false; |
145 | |
146 | OperatorArgs.push_back(Elt: Arg); |
147 | } |
148 | |
149 | return true; |
150 | } |
151 | |
152 | static Value *tryReplaceCallWithOperator(Oracle &O, Module &M, CallBase *CB) { |
153 | SmallVector<Value *, 4> Arguments; |
154 | |
155 | if (!callLooksLikeOperator(CB, OperatorArgs&: Arguments) || Arguments.size() > 3) |
156 | return nullptr; |
157 | |
158 | if (O.shouldKeep()) |
159 | return nullptr; |
160 | |
161 | IRBuilder<> B(CB); |
162 | if (CB->getType()->isFPOrFPVectorTy()) { |
163 | switch (Arguments.size()) { |
164 | case 1: |
165 | return B.CreateFNeg(V: Arguments[0]); |
166 | case 2: |
167 | return B.CreateFMul(L: Arguments[0], R: Arguments[1]); |
168 | case 3: |
169 | return B.CreateIntrinsic(ID: Intrinsic::fma, Types: {CB->getType()}, Args: Arguments); |
170 | default: |
171 | return nullptr; |
172 | } |
173 | |
174 | llvm_unreachable("all argument sizes handled" ); |
175 | } |
176 | |
177 | if (CB->getType()->isIntOrIntVectorTy()) { |
178 | switch (Arguments.size()) { |
179 | case 1: |
180 | return B.CreateUnaryIntrinsic(ID: Intrinsic::bswap, V: Arguments[0]); |
181 | case 2: |
182 | return B.CreateAnd(LHS: Arguments[0], RHS: Arguments[1]); |
183 | case 3: |
184 | return B.CreateIntrinsic(ID: Intrinsic::fshl, Types: {CB->getType()}, Args: Arguments); |
185 | default: |
186 | return nullptr; |
187 | } |
188 | |
189 | llvm_unreachable("all argument sizes handled" ); |
190 | } |
191 | |
192 | return nullptr; |
193 | } |
194 | |
195 | static Value *reduceInstruction(Oracle &O, Module &M, Instruction &I) { |
196 | IRBuilder<> B(&I); |
197 | |
198 | // TODO: fp binary operator with constant to fneg |
199 | switch (I.getOpcode()) { |
200 | case Instruction::FDiv: |
201 | case Instruction::FRem: |
202 | if (O.shouldKeep()) |
203 | return nullptr; |
204 | |
205 | // Divisions tends to codegen into a long sequence or a library call. |
206 | return B.CreateFMul(L: I.getOperand(i: 0), R: I.getOperand(i: 1)); |
207 | case Instruction::UDiv: |
208 | case Instruction::SDiv: |
209 | case Instruction::URem: |
210 | case Instruction::SRem: |
211 | if (O.shouldKeep()) |
212 | return nullptr; |
213 | |
214 | // Divisions tends to codegen into a long sequence or a library call. |
215 | return B.CreateMul(LHS: I.getOperand(i: 0), RHS: I.getOperand(i: 1)); |
216 | case Instruction::Add: |
217 | case Instruction::Sub: { |
218 | if (O.shouldKeep()) |
219 | return nullptr; |
220 | |
221 | // Add/sub are more likely codegen to instructions with carry out side |
222 | // effects. |
223 | return B.CreateOr(LHS: I.getOperand(i: 0), RHS: I.getOperand(i: 1)); |
224 | } |
225 | case Instruction::Call: { |
226 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &I)) |
227 | return reduceIntrinsic(O, M, II); |
228 | |
229 | CallBase *CB = cast<CallBase>(Val: &I); |
230 | |
231 | if (Value *NewOp = tryReplaceCallWithOperator(O, M, CB)) |
232 | return NewOp; |
233 | |
234 | if (Value *NewOp = tryReplaceCallWithLoadStore(O, M, CB)) |
235 | return NewOp; |
236 | |
237 | return nullptr; |
238 | } |
239 | default: |
240 | return nullptr; |
241 | } |
242 | |
243 | return nullptr; |
244 | } |
245 | |
246 | static void replaceOpcodesInModule(Oracle &O, ReducerWorkItem &WorkItem) { |
247 | Module &Mod = WorkItem.getModule(); |
248 | |
249 | for (Function &F : Mod) { |
250 | for (BasicBlock &BB : F) |
251 | for (Instruction &I : make_early_inc_range(Range&: BB)) { |
252 | Instruction *Replacement = |
253 | dyn_cast_or_null<Instruction>(Val: reduceInstruction(O, M&: Mod, I)); |
254 | if (Replacement && Replacement != &I) { |
255 | if (isa<FPMathOperator>(Val: Replacement)) |
256 | Replacement->copyFastMathFlags(I: &I); |
257 | |
258 | Replacement->copyIRFlags(V: &I); |
259 | Replacement->copyMetadata(SrcInst: I); |
260 | Replacement->takeName(V: &I); |
261 | I.replaceAllUsesWith(V: Replacement); |
262 | I.eraseFromParent(); |
263 | } |
264 | } |
265 | } |
266 | } |
267 | |
268 | void llvm::reduceOpcodesDeltaPass(TestRunner &Test) { |
269 | runDeltaPass(Test, ExtractChunksFromModule: replaceOpcodesInModule, Message: "Reducing Opcodes" ); |
270 | } |
271 | |