1 | //===-- Operations.cpp ----------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/FuzzMutate/Operations.h" |
10 | #include "llvm/IR/BasicBlock.h" |
11 | #include "llvm/IR/Constants.h" |
12 | #include "llvm/IR/Function.h" |
13 | #include "llvm/IR/Instructions.h" |
14 | |
15 | using namespace llvm; |
16 | using namespace fuzzerop; |
17 | |
18 | void llvm::describeFuzzerIntOps(std::vector<fuzzerop::OpDescriptor> &Ops) { |
19 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::Add)); |
20 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::Sub)); |
21 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::Mul)); |
22 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::SDiv)); |
23 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::UDiv)); |
24 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::SRem)); |
25 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::URem)); |
26 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::Shl)); |
27 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::LShr)); |
28 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::AShr)); |
29 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::And)); |
30 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::Or)); |
31 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::Xor)); |
32 | |
33 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_EQ)); |
34 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_NE)); |
35 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_UGT)); |
36 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_UGE)); |
37 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_ULT)); |
38 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_ULE)); |
39 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_SGT)); |
40 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_SGE)); |
41 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_SLT)); |
42 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_SLE)); |
43 | } |
44 | |
45 | void llvm::describeFuzzerFloatOps(std::vector<fuzzerop::OpDescriptor> &Ops) { |
46 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::FAdd)); |
47 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::FSub)); |
48 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::FMul)); |
49 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::FDiv)); |
50 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::FRem)); |
51 | |
52 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_FALSE)); |
53 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_OEQ)); |
54 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_OGT)); |
55 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_OGE)); |
56 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_OLT)); |
57 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_OLE)); |
58 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_ONE)); |
59 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_ORD)); |
60 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_UNO)); |
61 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_UEQ)); |
62 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_UGT)); |
63 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_UGE)); |
64 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_ULT)); |
65 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_ULE)); |
66 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_UNE)); |
67 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_TRUE)); |
68 | } |
69 | |
70 | void llvm::describeFuzzerUnaryOperations( |
71 | std::vector<fuzzerop::OpDescriptor> &Ops) { |
72 | Ops.push_back(x: fnegDescriptor(Weight: 1)); |
73 | } |
74 | |
75 | void llvm::describeFuzzerControlFlowOps( |
76 | std::vector<fuzzerop::OpDescriptor> &Ops) { |
77 | Ops.push_back(x: splitBlockDescriptor(Weight: 1)); |
78 | } |
79 | |
80 | void llvm::describeFuzzerOtherOps(std::vector<fuzzerop::OpDescriptor> &Ops) { |
81 | Ops.push_back(x: selectDescriptor(Weight: 1)); |
82 | } |
83 | |
84 | void llvm::describeFuzzerPointerOps(std::vector<fuzzerop::OpDescriptor> &Ops) { |
85 | Ops.push_back(x: gepDescriptor(Weight: 1)); |
86 | } |
87 | |
88 | void llvm::describeFuzzerAggregateOps( |
89 | std::vector<fuzzerop::OpDescriptor> &Ops) { |
90 | Ops.push_back(x: extractValueDescriptor(Weight: 1)); |
91 | Ops.push_back(x: insertValueDescriptor(Weight: 1)); |
92 | } |
93 | |
94 | void llvm::describeFuzzerVectorOps(std::vector<fuzzerop::OpDescriptor> &Ops) { |
95 | Ops.push_back(x: extractElementDescriptor(Weight: 1)); |
96 | Ops.push_back(x: insertElementDescriptor(Weight: 1)); |
97 | Ops.push_back(x: shuffleVectorDescriptor(Weight: 1)); |
98 | } |
99 | |
100 | OpDescriptor llvm::fuzzerop::selectDescriptor(unsigned Weight) { |
101 | auto buildOp = [](ArrayRef<Value *> Srcs, Instruction *Inst) { |
102 | return SelectInst::Create(C: Srcs[0], S1: Srcs[1], S2: Srcs[2], NameStr: "S" , InsertBefore: Inst); |
103 | }; |
104 | return {.Weight: Weight, |
105 | .SourcePreds: {boolOrVecBoolType(), matchFirstLengthWAnyType(), matchSecondType()}, |
106 | .BuilderFunc: buildOp}; |
107 | } |
108 | |
109 | OpDescriptor llvm::fuzzerop::fnegDescriptor(unsigned Weight) { |
110 | auto buildOp = [](ArrayRef<Value *> Srcs, Instruction *Inst) { |
111 | return UnaryOperator::Create(Op: Instruction::FNeg, S: Srcs[0], Name: "F" , InsertBefore: Inst); |
112 | }; |
113 | return {.Weight: Weight, .SourcePreds: {anyFloatOrVecFloatType()}, .BuilderFunc: buildOp}; |
114 | } |
115 | |
116 | OpDescriptor llvm::fuzzerop::binOpDescriptor(unsigned Weight, |
117 | Instruction::BinaryOps Op) { |
118 | auto buildOp = [Op](ArrayRef<Value *> Srcs, Instruction *Inst) { |
119 | return BinaryOperator::Create(Op, S1: Srcs[0], S2: Srcs[1], Name: "B" , InsertBefore: Inst); |
120 | }; |
121 | switch (Op) { |
122 | case Instruction::Add: |
123 | case Instruction::Sub: |
124 | case Instruction::Mul: |
125 | case Instruction::SDiv: |
126 | case Instruction::UDiv: |
127 | case Instruction::SRem: |
128 | case Instruction::URem: |
129 | case Instruction::Shl: |
130 | case Instruction::LShr: |
131 | case Instruction::AShr: |
132 | case Instruction::And: |
133 | case Instruction::Or: |
134 | case Instruction::Xor: |
135 | return {.Weight: Weight, .SourcePreds: {anyIntOrVecIntType(), matchFirstType()}, .BuilderFunc: buildOp}; |
136 | case Instruction::FAdd: |
137 | case Instruction::FSub: |
138 | case Instruction::FMul: |
139 | case Instruction::FDiv: |
140 | case Instruction::FRem: |
141 | return {.Weight: Weight, .SourcePreds: {anyFloatOrVecFloatType(), matchFirstType()}, .BuilderFunc: buildOp}; |
142 | case Instruction::BinaryOpsEnd: |
143 | llvm_unreachable("Value out of range of enum" ); |
144 | } |
145 | llvm_unreachable("Covered switch" ); |
146 | } |
147 | |
148 | OpDescriptor llvm::fuzzerop::cmpOpDescriptor(unsigned Weight, |
149 | Instruction::OtherOps CmpOp, |
150 | CmpInst::Predicate Pred) { |
151 | auto buildOp = [CmpOp, Pred](ArrayRef<Value *> Srcs, Instruction *Inst) { |
152 | return CmpInst::Create(Op: CmpOp, Pred, S1: Srcs[0], S2: Srcs[1], Name: "C" , InsertBefore: Inst); |
153 | }; |
154 | |
155 | switch (CmpOp) { |
156 | case Instruction::ICmp: |
157 | return {.Weight: Weight, .SourcePreds: {anyIntOrVecIntType(), matchFirstType()}, .BuilderFunc: buildOp}; |
158 | case Instruction::FCmp: |
159 | return {.Weight: Weight, .SourcePreds: {anyFloatOrVecFloatType(), matchFirstType()}, .BuilderFunc: buildOp}; |
160 | default: |
161 | llvm_unreachable("CmpOp must be ICmp or FCmp" ); |
162 | } |
163 | } |
164 | |
165 | OpDescriptor llvm::fuzzerop::splitBlockDescriptor(unsigned Weight) { |
166 | auto buildSplitBlock = [](ArrayRef<Value *> Srcs, Instruction *Inst) { |
167 | BasicBlock *Block = Inst->getParent(); |
168 | BasicBlock *Next = Block->splitBasicBlock(I: Inst, BBName: "BB" ); |
169 | |
170 | // If it was an exception handling block, we are done. |
171 | if (Block->isEHPad()) |
172 | return nullptr; |
173 | |
174 | // Loop back on this block by replacing the unconditional forward branch |
175 | // with a conditional with a backedge. |
176 | if (Block != &Block->getParent()->getEntryBlock()) { |
177 | BranchInst::Create(IfTrue: Block, IfFalse: Next, Cond: Srcs[0], InsertBefore: Block->getTerminator()); |
178 | Block->getTerminator()->eraseFromParent(); |
179 | |
180 | // We need values for each phi in the block. Since there isn't a good way |
181 | // to do a variable number of input values currently, we just fill them |
182 | // with undef. |
183 | for (PHINode &PHI : Block->phis()) |
184 | PHI.addIncoming(V: UndefValue::get(T: PHI.getType()), BB: Block); |
185 | } |
186 | return nullptr; |
187 | }; |
188 | SourcePred isInt1Ty{[](ArrayRef<Value *>, const Value *V) { |
189 | return V->getType()->isIntegerTy(Bitwidth: 1); |
190 | }, |
191 | std::nullopt}; |
192 | return {.Weight: Weight, .SourcePreds: {isInt1Ty}, .BuilderFunc: buildSplitBlock}; |
193 | } |
194 | |
195 | OpDescriptor llvm::fuzzerop::gepDescriptor(unsigned Weight) { |
196 | auto buildGEP = [](ArrayRef<Value *> Srcs, Instruction *Inst) { |
197 | // TODO: It would be better to generate a random type here, rather than |
198 | // generating a random value and picking its type. |
199 | Type *Ty = Srcs[1]->getType(); |
200 | auto Indices = ArrayRef(Srcs).drop_front(N: 2); |
201 | return GetElementPtrInst::Create(PointeeType: Ty, Ptr: Srcs[0], IdxList: Indices, NameStr: "G" , InsertBefore: Inst); |
202 | }; |
203 | // TODO: Handle aggregates and vectors |
204 | // TODO: Support multiple indices. |
205 | // TODO: Try to avoid meaningless accesses. |
206 | SourcePred sizedType( |
207 | [](ArrayRef<Value *>, const Value *V) { return V->getType()->isSized(); }, |
208 | std::nullopt); |
209 | return {.Weight: Weight, .SourcePreds: {sizedPtrType(), sizedType, anyIntType()}, .BuilderFunc: buildGEP}; |
210 | } |
211 | |
212 | static uint64_t getAggregateNumElements(Type *T) { |
213 | assert(T->isAggregateType() && "Not a struct or array" ); |
214 | if (isa<StructType>(Val: T)) |
215 | return T->getStructNumElements(); |
216 | return T->getArrayNumElements(); |
217 | } |
218 | |
219 | static SourcePred () { |
220 | auto Pred = [](ArrayRef<Value *> Cur, const Value *V) { |
221 | if (auto *CI = dyn_cast<ConstantInt>(Val: V)) |
222 | if (!CI->uge(Num: getAggregateNumElements(T: Cur[0]->getType()))) |
223 | return true; |
224 | return false; |
225 | }; |
226 | auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *> Ts) { |
227 | std::vector<Constant *> Result; |
228 | auto *Int32Ty = Type::getInt32Ty(C&: Cur[0]->getContext()); |
229 | uint64_t N = getAggregateNumElements(T: Cur[0]->getType()); |
230 | // Create indices at the start, end, and middle, but avoid dups. |
231 | Result.push_back(x: ConstantInt::get(Ty: Int32Ty, V: 0)); |
232 | if (N > 1) |
233 | Result.push_back(x: ConstantInt::get(Ty: Int32Ty, V: N - 1)); |
234 | if (N > 2) |
235 | Result.push_back(x: ConstantInt::get(Ty: Int32Ty, V: N / 2)); |
236 | return Result; |
237 | }; |
238 | return {Pred, Make}; |
239 | } |
240 | |
241 | OpDescriptor llvm::fuzzerop::(unsigned Weight) { |
242 | auto = [](ArrayRef<Value *> Srcs, Instruction *Inst) { |
243 | // TODO: It's pretty inefficient to shuffle this all through constants. |
244 | unsigned Idx = cast<ConstantInt>(Val: Srcs[1])->getZExtValue(); |
245 | return ExtractValueInst::Create(Agg: Srcs[0], Idxs: {Idx}, NameStr: "E" , InsertBefore: Inst); |
246 | }; |
247 | // TODO: Should we handle multiple indices? |
248 | return {.Weight: Weight, .SourcePreds: {anyAggregateType(), validExtractValueIndex()}, .BuilderFunc: buildExtract}; |
249 | } |
250 | |
251 | static SourcePred matchScalarInAggregate() { |
252 | auto Pred = [](ArrayRef<Value *> Cur, const Value *V) { |
253 | if (auto *ArrayT = dyn_cast<ArrayType>(Val: Cur[0]->getType())) |
254 | return V->getType() == ArrayT->getElementType(); |
255 | |
256 | auto *STy = cast<StructType>(Val: Cur[0]->getType()); |
257 | for (int I = 0, E = STy->getNumElements(); I < E; ++I) |
258 | if (STy->getTypeAtIndex(N: I) == V->getType()) |
259 | return true; |
260 | return false; |
261 | }; |
262 | auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *>) { |
263 | if (auto *ArrayT = dyn_cast<ArrayType>(Val: Cur[0]->getType())) |
264 | return makeConstantsWithType(T: ArrayT->getElementType()); |
265 | |
266 | std::vector<Constant *> Result; |
267 | auto *STy = cast<StructType>(Val: Cur[0]->getType()); |
268 | for (int I = 0, E = STy->getNumElements(); I < E; ++I) |
269 | makeConstantsWithType(T: STy->getTypeAtIndex(N: I), Cs&: Result); |
270 | return Result; |
271 | }; |
272 | return {Pred, Make}; |
273 | } |
274 | |
275 | static SourcePred validInsertValueIndex() { |
276 | auto Pred = [](ArrayRef<Value *> Cur, const Value *V) { |
277 | if (auto *CI = dyn_cast<ConstantInt>(Val: V)) |
278 | if (CI->getBitWidth() == 32) { |
279 | Type *Indexed = ExtractValueInst::getIndexedType(Agg: Cur[0]->getType(), |
280 | Idxs: CI->getZExtValue()); |
281 | return Indexed == Cur[1]->getType(); |
282 | } |
283 | return false; |
284 | }; |
285 | auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *> Ts) { |
286 | std::vector<Constant *> Result; |
287 | auto *Int32Ty = Type::getInt32Ty(C&: Cur[0]->getContext()); |
288 | auto *BaseTy = Cur[0]->getType(); |
289 | int I = 0; |
290 | while (Type *Indexed = ExtractValueInst::getIndexedType(Agg: BaseTy, Idxs: I)) { |
291 | if (Indexed == Cur[1]->getType()) |
292 | Result.push_back(x: ConstantInt::get(Ty: Int32Ty, V: I)); |
293 | ++I; |
294 | } |
295 | return Result; |
296 | }; |
297 | return {Pred, Make}; |
298 | } |
299 | |
300 | OpDescriptor llvm::fuzzerop::insertValueDescriptor(unsigned Weight) { |
301 | auto buildInsert = [](ArrayRef<Value *> Srcs, Instruction *Inst) { |
302 | // TODO: It's pretty inefficient to shuffle this all through constants. |
303 | unsigned Idx = cast<ConstantInt>(Val: Srcs[2])->getZExtValue(); |
304 | return InsertValueInst::Create(Agg: Srcs[0], Val: Srcs[1], Idxs: {Idx}, NameStr: "I" , InsertBefore: Inst); |
305 | }; |
306 | return { |
307 | .Weight: Weight, |
308 | .SourcePreds: {anyAggregateType(), matchScalarInAggregate(), validInsertValueIndex()}, |
309 | .BuilderFunc: buildInsert}; |
310 | } |
311 | |
312 | OpDescriptor llvm::fuzzerop::(unsigned Weight) { |
313 | auto = [](ArrayRef<Value *> Srcs, Instruction *Inst) { |
314 | return ExtractElementInst::Create(Vec: Srcs[0], Idx: Srcs[1], NameStr: "E" , InsertBefore: Inst); |
315 | }; |
316 | // TODO: Try to avoid undefined accesses. |
317 | return {.Weight: Weight, .SourcePreds: {anyVectorType(), anyIntType()}, .BuilderFunc: buildExtract}; |
318 | } |
319 | |
320 | OpDescriptor llvm::fuzzerop::insertElementDescriptor(unsigned Weight) { |
321 | auto buildInsert = [](ArrayRef<Value *> Srcs, Instruction *Inst) { |
322 | return InsertElementInst::Create(Vec: Srcs[0], NewElt: Srcs[1], Idx: Srcs[2], NameStr: "I" , InsertBefore: Inst); |
323 | }; |
324 | // TODO: Try to avoid undefined accesses. |
325 | return {.Weight: Weight, |
326 | .SourcePreds: {anyVectorType(), matchScalarOfFirstType(), anyIntType()}, |
327 | .BuilderFunc: buildInsert}; |
328 | } |
329 | |
330 | static SourcePred validShuffleVectorIndex() { |
331 | auto Pred = [](ArrayRef<Value *> Cur, const Value *V) { |
332 | return ShuffleVectorInst::isValidOperands(V1: Cur[0], V2: Cur[1], Mask: V); |
333 | }; |
334 | auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *> Ts) { |
335 | auto *FirstTy = cast<VectorType>(Val: Cur[0]->getType()); |
336 | auto *Int32Ty = Type::getInt32Ty(C&: Cur[0]->getContext()); |
337 | // TODO: It's straighforward to make up reasonable values, but listing them |
338 | // exhaustively would be insane. Come up with a couple of sensible ones. |
339 | return std::vector<Constant *>{ |
340 | UndefValue::get(T: VectorType::get(ElementType: Int32Ty, EC: FirstTy->getElementCount()))}; |
341 | }; |
342 | return {Pred, Make}; |
343 | } |
344 | |
345 | OpDescriptor llvm::fuzzerop::shuffleVectorDescriptor(unsigned Weight) { |
346 | auto buildShuffle = [](ArrayRef<Value *> Srcs, Instruction *Inst) { |
347 | return new ShuffleVectorInst(Srcs[0], Srcs[1], Srcs[2], "S" , Inst); |
348 | }; |
349 | return {.Weight: Weight, |
350 | .SourcePreds: {anyVectorType(), matchFirstType(), validShuffleVectorIndex()}, |
351 | .BuilderFunc: buildShuffle}; |
352 | } |
353 | |