| 1 | //===- ReduceOpcodes.cpp - Specialized Delta Pass -------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Try to replace instructions that are likely to codegen to simpler or smaller |
| 10 | // sequences. This is a fuzzy and target specific concept. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "ReduceOpcodes.h" |
| 15 | #include "llvm/IR/IRBuilder.h" |
| 16 | #include "llvm/IR/Instructions.h" |
| 17 | #include "llvm/IR/IntrinsicInst.h" |
| 18 | #include "llvm/IR/Intrinsics.h" |
| 19 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
| 20 | |
| 21 | using namespace llvm; |
| 22 | |
| 23 | // Assume outgoing undef arguments aren't relevant. |
| 24 | // TODO: Maybe skip any trivial constant arguments. |
| 25 | static bool shouldIgnoreArgument(const Value *V) { |
| 26 | return isa<UndefValue>(Val: V); |
| 27 | } |
| 28 | |
| 29 | static Value *replaceIntrinsic(Module &M, IntrinsicInst *II, |
| 30 | Intrinsic::ID NewIID, |
| 31 | ArrayRef<Type *> Tys = {}) { |
| 32 | Function *NewFunc = Intrinsic::getOrInsertDeclaration(M: &M, id: NewIID, Tys); |
| 33 | II->setCalledFunction(NewFunc); |
| 34 | return II; |
| 35 | } |
| 36 | |
| 37 | static Value *reduceIntrinsic(Oracle &O, Module &M, IntrinsicInst *II) { |
| 38 | IRBuilder<> B(II); |
| 39 | switch (II->getIntrinsicID()) { |
| 40 | case Intrinsic::sqrt: |
| 41 | if (O.shouldKeep()) |
| 42 | return nullptr; |
| 43 | |
| 44 | return B.CreateFMul(L: II->getArgOperand(i: 0), |
| 45 | R: ConstantFP::get(Ty: II->getType(), V: 2.0)); |
| 46 | case Intrinsic::minnum: |
| 47 | case Intrinsic::maxnum: |
| 48 | case Intrinsic::minimum: |
| 49 | case Intrinsic::maximum: |
| 50 | case Intrinsic::amdgcn_fmul_legacy: |
| 51 | if (O.shouldKeep()) |
| 52 | return nullptr; |
| 53 | return B.CreateFMul(L: II->getArgOperand(i: 0), R: II->getArgOperand(i: 1)); |
| 54 | case Intrinsic::amdgcn_workitem_id_y: |
| 55 | case Intrinsic::amdgcn_workitem_id_z: |
| 56 | if (O.shouldKeep()) |
| 57 | return nullptr; |
| 58 | return replaceIntrinsic(M, II, NewIID: Intrinsic::amdgcn_workitem_id_x); |
| 59 | case Intrinsic::amdgcn_workgroup_id_y: |
| 60 | case Intrinsic::amdgcn_workgroup_id_z: |
| 61 | if (O.shouldKeep()) |
| 62 | return nullptr; |
| 63 | return replaceIntrinsic(M, II, NewIID: Intrinsic::amdgcn_workgroup_id_x); |
| 64 | case Intrinsic::amdgcn_div_fixup: |
| 65 | case Intrinsic::amdgcn_fma_legacy: |
| 66 | if (O.shouldKeep()) |
| 67 | return nullptr; |
| 68 | return replaceIntrinsic(M, II, NewIID: Intrinsic::fma, Tys: {II->getType()}); |
| 69 | default: |
| 70 | return nullptr; |
| 71 | } |
| 72 | } |
| 73 | |
| 74 | /// Look for calls that look like they could be replaced with a load or store. |
| 75 | static bool callLooksLikeLoadStore(CallBase *CB, Value *&DataArg, |
| 76 | Value *&PtrArg) { |
| 77 | const bool IsStore = CB->getType()->isVoidTy(); |
| 78 | |
| 79 | PtrArg = nullptr; |
| 80 | DataArg = nullptr; |
| 81 | for (Value *Arg : CB->args()) { |
| 82 | if (shouldIgnoreArgument(V: Arg)) |
| 83 | continue; |
| 84 | |
| 85 | if (!Arg->getType()->isSized()) |
| 86 | return false; |
| 87 | |
| 88 | if (!PtrArg && Arg->getType()->isPointerTy()) { |
| 89 | PtrArg = Arg; |
| 90 | continue; |
| 91 | } |
| 92 | |
| 93 | if (!IsStore || DataArg) |
| 94 | return false; |
| 95 | |
| 96 | DataArg = Arg; |
| 97 | } |
| 98 | |
| 99 | if (IsStore && !DataArg) { |
| 100 | // FIXME: For typed pointers, use element type? |
| 101 | DataArg = ConstantInt::get(Ty: IntegerType::getInt32Ty(C&: CB->getContext()), V: 0); |
| 102 | } |
| 103 | |
| 104 | // If we didn't find any arguments, we can fill in the pointer. |
| 105 | if (!PtrArg) { |
| 106 | unsigned AS = CB->getDataLayout().getAllocaAddrSpace(); |
| 107 | |
| 108 | PointerType *PtrTy = PointerType::get(C&: CB->getContext(), AddressSpace: AS); |
| 109 | |
| 110 | PtrArg = ConstantPointerNull::get(T: PtrTy); |
| 111 | } |
| 112 | |
| 113 | return true; |
| 114 | } |
| 115 | |
| 116 | // TODO: Replace 2 pointer argument calls with memcpy |
| 117 | static Value *tryReplaceCallWithLoadStore(Oracle &O, Module &M, CallBase *CB) { |
| 118 | Value *PtrArg = nullptr; |
| 119 | Value *DataArg = nullptr; |
| 120 | if (!callLooksLikeLoadStore(CB, DataArg, PtrArg) || O.shouldKeep()) |
| 121 | return nullptr; |
| 122 | |
| 123 | IRBuilder<> B(CB); |
| 124 | if (DataArg) |
| 125 | return B.CreateStore(Val: DataArg, Ptr: PtrArg, isVolatile: true); |
| 126 | return B.CreateLoad(Ty: CB->getType(), Ptr: PtrArg, isVolatile: true); |
| 127 | } |
| 128 | |
| 129 | static bool callLooksLikeOperator(CallBase *CB, |
| 130 | SmallVectorImpl<Value *> &OperatorArgs) { |
| 131 | Type *ReturnTy = CB->getType(); |
| 132 | if (!ReturnTy->isFirstClassType()) |
| 133 | return false; |
| 134 | |
| 135 | for (Value *Arg : CB->args()) { |
| 136 | if (shouldIgnoreArgument(V: Arg)) |
| 137 | continue; |
| 138 | |
| 139 | if (Arg->getType() != ReturnTy) |
| 140 | return false; |
| 141 | |
| 142 | OperatorArgs.push_back(Elt: Arg); |
| 143 | } |
| 144 | |
| 145 | return true; |
| 146 | } |
| 147 | |
| 148 | static Value *tryReplaceCallWithOperator(Oracle &O, Module &M, CallBase *CB) { |
| 149 | SmallVector<Value *, 4> Arguments; |
| 150 | |
| 151 | if (!callLooksLikeOperator(CB, OperatorArgs&: Arguments) || Arguments.size() > 3) |
| 152 | return nullptr; |
| 153 | |
| 154 | if (O.shouldKeep()) |
| 155 | return nullptr; |
| 156 | |
| 157 | IRBuilder<> B(CB); |
| 158 | if (CB->getType()->isFPOrFPVectorTy()) { |
| 159 | switch (Arguments.size()) { |
| 160 | case 1: |
| 161 | return B.CreateFNeg(V: Arguments[0]); |
| 162 | case 2: |
| 163 | return B.CreateFMul(L: Arguments[0], R: Arguments[1]); |
| 164 | case 3: |
| 165 | return B.CreateIntrinsic(ID: Intrinsic::fma, Types: {CB->getType()}, Args: Arguments); |
| 166 | default: |
| 167 | return nullptr; |
| 168 | } |
| 169 | |
| 170 | llvm_unreachable("all argument sizes handled" ); |
| 171 | } |
| 172 | |
| 173 | if (CB->getType()->isIntOrIntVectorTy()) { |
| 174 | switch (Arguments.size()) { |
| 175 | case 1: |
| 176 | return B.CreateUnaryIntrinsic(ID: Intrinsic::bswap, V: Arguments[0]); |
| 177 | case 2: |
| 178 | return B.CreateAnd(LHS: Arguments[0], RHS: Arguments[1]); |
| 179 | case 3: |
| 180 | return B.CreateIntrinsic(ID: Intrinsic::fshl, Types: {CB->getType()}, Args: Arguments); |
| 181 | default: |
| 182 | return nullptr; |
| 183 | } |
| 184 | |
| 185 | llvm_unreachable("all argument sizes handled" ); |
| 186 | } |
| 187 | |
| 188 | return nullptr; |
| 189 | } |
| 190 | |
| 191 | static Value *reduceInstruction(Oracle &O, Module &M, Instruction &I) { |
| 192 | IRBuilder<> B(&I); |
| 193 | |
| 194 | // TODO: fp binary operator with constant to fneg |
| 195 | switch (I.getOpcode()) { |
| 196 | case Instruction::FDiv: |
| 197 | case Instruction::FRem: |
| 198 | if (O.shouldKeep()) |
| 199 | return nullptr; |
| 200 | |
| 201 | // Divisions tends to codegen into a long sequence or a library call. |
| 202 | return B.CreateFMul(L: I.getOperand(i: 0), R: I.getOperand(i: 1)); |
| 203 | case Instruction::UDiv: |
| 204 | case Instruction::SDiv: |
| 205 | case Instruction::URem: |
| 206 | case Instruction::SRem: |
| 207 | if (O.shouldKeep()) |
| 208 | return nullptr; |
| 209 | |
| 210 | // Divisions tends to codegen into a long sequence or a library call. |
| 211 | return B.CreateMul(LHS: I.getOperand(i: 0), RHS: I.getOperand(i: 1)); |
| 212 | case Instruction::Add: |
| 213 | case Instruction::Sub: { |
| 214 | if (O.shouldKeep()) |
| 215 | return nullptr; |
| 216 | |
| 217 | // Add/sub are more likely codegen to instructions with carry out side |
| 218 | // effects. |
| 219 | return B.CreateOr(LHS: I.getOperand(i: 0), RHS: I.getOperand(i: 1)); |
| 220 | } |
| 221 | case Instruction::Call: { |
| 222 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &I)) |
| 223 | return reduceIntrinsic(O, M, II); |
| 224 | |
| 225 | CallBase *CB = cast<CallBase>(Val: &I); |
| 226 | |
| 227 | if (Value *NewOp = tryReplaceCallWithOperator(O, M, CB)) |
| 228 | return NewOp; |
| 229 | |
| 230 | if (Value *NewOp = tryReplaceCallWithLoadStore(O, M, CB)) |
| 231 | return NewOp; |
| 232 | |
| 233 | return nullptr; |
| 234 | } |
| 235 | default: |
| 236 | return nullptr; |
| 237 | } |
| 238 | |
| 239 | return nullptr; |
| 240 | } |
| 241 | |
| 242 | void llvm::reduceOpcodesDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { |
| 243 | Module &Mod = WorkItem.getModule(); |
| 244 | |
| 245 | for (Function &F : Mod) { |
| 246 | for (BasicBlock &BB : F) |
| 247 | for (Instruction &I : make_early_inc_range(Range&: BB)) { |
| 248 | Instruction *Replacement = |
| 249 | dyn_cast_or_null<Instruction>(Val: reduceInstruction(O, M&: Mod, I)); |
| 250 | if (Replacement && Replacement != &I) { |
| 251 | if (isa<FPMathOperator>(Val: Replacement)) |
| 252 | Replacement->copyFastMathFlags(I: &I); |
| 253 | |
| 254 | Replacement->copyIRFlags(V: &I); |
| 255 | Replacement->copyMetadata(SrcInst: I); |
| 256 | Replacement->takeName(V: &I); |
| 257 | I.replaceAllUsesWith(V: Replacement); |
| 258 | I.eraseFromParent(); |
| 259 | } |
| 260 | } |
| 261 | } |
| 262 | } |
| 263 | |