1 | //===-- Operations.cpp ----------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/FuzzMutate/Operations.h" |
10 | #include "llvm/IR/BasicBlock.h" |
11 | #include "llvm/IR/Constants.h" |
12 | #include "llvm/IR/Function.h" |
13 | #include "llvm/IR/Instructions.h" |
14 | |
15 | using namespace llvm; |
16 | using namespace fuzzerop; |
17 | |
18 | void llvm::describeFuzzerIntOps(std::vector<fuzzerop::OpDescriptor> &Ops) { |
19 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::Add)); |
20 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::Sub)); |
21 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::Mul)); |
22 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::SDiv)); |
23 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::UDiv)); |
24 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::SRem)); |
25 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::URem)); |
26 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::Shl)); |
27 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::LShr)); |
28 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::AShr)); |
29 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::And)); |
30 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::Or)); |
31 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::Xor)); |
32 | |
33 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_EQ)); |
34 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_NE)); |
35 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_UGT)); |
36 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_UGE)); |
37 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_ULT)); |
38 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_ULE)); |
39 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_SGT)); |
40 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_SGE)); |
41 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_SLT)); |
42 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::ICmp, Pred: CmpInst::ICMP_SLE)); |
43 | } |
44 | |
45 | void llvm::describeFuzzerFloatOps(std::vector<fuzzerop::OpDescriptor> &Ops) { |
46 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::FAdd)); |
47 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::FSub)); |
48 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::FMul)); |
49 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::FDiv)); |
50 | Ops.push_back(x: binOpDescriptor(Weight: 1, Op: Instruction::FRem)); |
51 | |
52 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_FALSE)); |
53 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_OEQ)); |
54 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_OGT)); |
55 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_OGE)); |
56 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_OLT)); |
57 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_OLE)); |
58 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_ONE)); |
59 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_ORD)); |
60 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_UNO)); |
61 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_UEQ)); |
62 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_UGT)); |
63 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_UGE)); |
64 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_ULT)); |
65 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_ULE)); |
66 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_UNE)); |
67 | Ops.push_back(x: cmpOpDescriptor(Weight: 1, CmpOp: Instruction::FCmp, Pred: CmpInst::FCMP_TRUE)); |
68 | } |
69 | |
70 | void llvm::describeFuzzerUnaryOperations( |
71 | std::vector<fuzzerop::OpDescriptor> &Ops) { |
72 | Ops.push_back(x: fnegDescriptor(Weight: 1)); |
73 | } |
74 | |
75 | void llvm::describeFuzzerControlFlowOps( |
76 | std::vector<fuzzerop::OpDescriptor> &Ops) { |
77 | Ops.push_back(x: splitBlockDescriptor(Weight: 1)); |
78 | } |
79 | |
80 | void llvm::describeFuzzerOtherOps(std::vector<fuzzerop::OpDescriptor> &Ops) { |
81 | Ops.push_back(x: selectDescriptor(Weight: 1)); |
82 | } |
83 | |
84 | void llvm::describeFuzzerPointerOps(std::vector<fuzzerop::OpDescriptor> &Ops) { |
85 | Ops.push_back(x: gepDescriptor(Weight: 1)); |
86 | } |
87 | |
88 | void llvm::describeFuzzerAggregateOps( |
89 | std::vector<fuzzerop::OpDescriptor> &Ops) { |
90 | Ops.push_back(x: extractValueDescriptor(Weight: 1)); |
91 | Ops.push_back(x: insertValueDescriptor(Weight: 1)); |
92 | } |
93 | |
94 | void llvm::describeFuzzerVectorOps(std::vector<fuzzerop::OpDescriptor> &Ops) { |
95 | Ops.push_back(x: extractElementDescriptor(Weight: 1)); |
96 | Ops.push_back(x: insertElementDescriptor(Weight: 1)); |
97 | Ops.push_back(x: shuffleVectorDescriptor(Weight: 1)); |
98 | } |
99 | |
100 | OpDescriptor llvm::fuzzerop::selectDescriptor(unsigned Weight) { |
101 | auto buildOp = [](ArrayRef<Value *> Srcs, BasicBlock::iterator InsertPt) { |
102 | return SelectInst::Create(C: Srcs[0], S1: Srcs[1], S2: Srcs[2], NameStr: "S" , InsertBefore: InsertPt); |
103 | }; |
104 | return {.Weight: Weight, |
105 | .SourcePreds: {boolOrVecBoolType(), matchFirstLengthWAnyType(), matchSecondType()}, |
106 | .BuilderFunc: buildOp}; |
107 | } |
108 | |
109 | OpDescriptor llvm::fuzzerop::fnegDescriptor(unsigned Weight) { |
110 | auto buildOp = [](ArrayRef<Value *> Srcs, BasicBlock::iterator InsertPt) { |
111 | return UnaryOperator::Create(Op: Instruction::FNeg, S: Srcs[0], Name: "F" , InsertBefore: InsertPt); |
112 | }; |
113 | return {.Weight: Weight, .SourcePreds: {anyFloatOrVecFloatType()}, .BuilderFunc: buildOp}; |
114 | } |
115 | |
116 | OpDescriptor llvm::fuzzerop::binOpDescriptor(unsigned Weight, |
117 | Instruction::BinaryOps Op) { |
118 | auto buildOp = [Op](ArrayRef<Value *> Srcs, BasicBlock::iterator InsertPt) { |
119 | return BinaryOperator::Create(Op, S1: Srcs[0], S2: Srcs[1], Name: "B" , InsertBefore: InsertPt); |
120 | }; |
121 | switch (Op) { |
122 | case Instruction::Add: |
123 | case Instruction::Sub: |
124 | case Instruction::Mul: |
125 | case Instruction::SDiv: |
126 | case Instruction::UDiv: |
127 | case Instruction::SRem: |
128 | case Instruction::URem: |
129 | case Instruction::Shl: |
130 | case Instruction::LShr: |
131 | case Instruction::AShr: |
132 | case Instruction::And: |
133 | case Instruction::Or: |
134 | case Instruction::Xor: |
135 | return {.Weight: Weight, .SourcePreds: {anyIntOrVecIntType(), matchFirstType()}, .BuilderFunc: buildOp}; |
136 | case Instruction::FAdd: |
137 | case Instruction::FSub: |
138 | case Instruction::FMul: |
139 | case Instruction::FDiv: |
140 | case Instruction::FRem: |
141 | return {.Weight: Weight, .SourcePreds: {anyFloatOrVecFloatType(), matchFirstType()}, .BuilderFunc: buildOp}; |
142 | case Instruction::BinaryOpsEnd: |
143 | llvm_unreachable("Value out of range of enum" ); |
144 | } |
145 | llvm_unreachable("Covered switch" ); |
146 | } |
147 | |
148 | OpDescriptor llvm::fuzzerop::cmpOpDescriptor(unsigned Weight, |
149 | Instruction::OtherOps CmpOp, |
150 | CmpInst::Predicate Pred) { |
151 | auto buildOp = [CmpOp, Pred](ArrayRef<Value *> Srcs, |
152 | BasicBlock::iterator InsertPt) { |
153 | return CmpInst::Create(Op: CmpOp, Pred, S1: Srcs[0], S2: Srcs[1], Name: "C" , InsertBefore: InsertPt); |
154 | }; |
155 | |
156 | switch (CmpOp) { |
157 | case Instruction::ICmp: |
158 | return {.Weight: Weight, .SourcePreds: {anyIntOrVecIntType(), matchFirstType()}, .BuilderFunc: buildOp}; |
159 | case Instruction::FCmp: |
160 | return {.Weight: Weight, .SourcePreds: {anyFloatOrVecFloatType(), matchFirstType()}, .BuilderFunc: buildOp}; |
161 | default: |
162 | llvm_unreachable("CmpOp must be ICmp or FCmp" ); |
163 | } |
164 | } |
165 | |
166 | OpDescriptor llvm::fuzzerop::splitBlockDescriptor(unsigned Weight) { |
167 | auto buildSplitBlock = [](ArrayRef<Value *> Srcs, |
168 | BasicBlock::iterator InsertPt) { |
169 | BasicBlock *Block = InsertPt->getParent(); |
170 | BasicBlock *Next = Block->splitBasicBlock(I: InsertPt, BBName: "BB" ); |
171 | |
172 | // If it was an exception handling block, we are done. |
173 | if (Block->isEHPad()) |
174 | return nullptr; |
175 | |
176 | // Loop back on this block by replacing the unconditional forward branch |
177 | // with a conditional with a backedge. |
178 | if (Block != &Block->getParent()->getEntryBlock()) { |
179 | BranchInst::Create(IfTrue: Block, IfFalse: Next, Cond: Srcs[0], |
180 | InsertBefore: Block->getTerminator()->getIterator()); |
181 | Block->getTerminator()->eraseFromParent(); |
182 | |
183 | // We need values for each phi in the block. Since there isn't a good way |
184 | // to do a variable number of input values currently, we just fill them |
185 | // with poison. |
186 | for (PHINode &PHI : Block->phis()) |
187 | PHI.addIncoming(V: PoisonValue::get(T: PHI.getType()), BB: Block); |
188 | } |
189 | return nullptr; |
190 | }; |
191 | SourcePred isInt1Ty{[](ArrayRef<Value *>, const Value *V) { |
192 | return V->getType()->isIntegerTy(Bitwidth: 1); |
193 | }, |
194 | std::nullopt}; |
195 | return {.Weight: Weight, .SourcePreds: {isInt1Ty}, .BuilderFunc: buildSplitBlock}; |
196 | } |
197 | |
198 | OpDescriptor llvm::fuzzerop::gepDescriptor(unsigned Weight) { |
199 | auto buildGEP = [](ArrayRef<Value *> Srcs, BasicBlock::iterator InsertPt) { |
200 | // TODO: It would be better to generate a random type here, rather than |
201 | // generating a random value and picking its type. |
202 | Type *Ty = Srcs[1]->getType(); |
203 | auto Indices = ArrayRef(Srcs).drop_front(N: 2); |
204 | return GetElementPtrInst::Create(PointeeType: Ty, Ptr: Srcs[0], IdxList: Indices, NameStr: "G" , InsertBefore: InsertPt); |
205 | }; |
206 | // TODO: Handle aggregates and vectors |
207 | // TODO: Support multiple indices. |
208 | // TODO: Try to avoid meaningless accesses. |
209 | SourcePred sizedType( |
210 | [](ArrayRef<Value *>, const Value *V) { return V->getType()->isSized(); }, |
211 | std::nullopt); |
212 | return {.Weight: Weight, .SourcePreds: {sizedPtrType(), sizedType, anyIntType()}, .BuilderFunc: buildGEP}; |
213 | } |
214 | |
215 | static uint64_t getAggregateNumElements(Type *T) { |
216 | assert(T->isAggregateType() && "Not a struct or array" ); |
217 | if (isa<StructType>(Val: T)) |
218 | return T->getStructNumElements(); |
219 | return T->getArrayNumElements(); |
220 | } |
221 | |
222 | static SourcePred () { |
223 | auto Pred = [](ArrayRef<Value *> Cur, const Value *V) { |
224 | if (auto *CI = dyn_cast<ConstantInt>(Val: V)) |
225 | if (!CI->uge(Num: getAggregateNumElements(T: Cur[0]->getType()))) |
226 | return true; |
227 | return false; |
228 | }; |
229 | auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *> Ts) { |
230 | std::vector<Constant *> Result; |
231 | auto *Int32Ty = Type::getInt32Ty(C&: Cur[0]->getContext()); |
232 | uint64_t N = getAggregateNumElements(T: Cur[0]->getType()); |
233 | // Create indices at the start, end, and middle, but avoid dups. |
234 | Result.push_back(x: ConstantInt::get(Ty: Int32Ty, V: 0)); |
235 | if (N > 1) |
236 | Result.push_back(x: ConstantInt::get(Ty: Int32Ty, V: N - 1)); |
237 | if (N > 2) |
238 | Result.push_back(x: ConstantInt::get(Ty: Int32Ty, V: N / 2)); |
239 | return Result; |
240 | }; |
241 | return {Pred, Make}; |
242 | } |
243 | |
244 | OpDescriptor llvm::fuzzerop::(unsigned Weight) { |
245 | auto = [](ArrayRef<Value *> Srcs, |
246 | BasicBlock::iterator InsertPt) { |
247 | // TODO: It's pretty inefficient to shuffle this all through constants. |
248 | unsigned Idx = cast<ConstantInt>(Val: Srcs[1])->getZExtValue(); |
249 | return ExtractValueInst::Create(Agg: Srcs[0], Idxs: {Idx}, NameStr: "E" , InsertBefore: InsertPt); |
250 | }; |
251 | // TODO: Should we handle multiple indices? |
252 | return {.Weight: Weight, .SourcePreds: {anyAggregateType(), validExtractValueIndex()}, .BuilderFunc: buildExtract}; |
253 | } |
254 | |
255 | static SourcePred matchScalarInAggregate() { |
256 | auto Pred = [](ArrayRef<Value *> Cur, const Value *V) { |
257 | if (auto *ArrayT = dyn_cast<ArrayType>(Val: Cur[0]->getType())) |
258 | return V->getType() == ArrayT->getElementType(); |
259 | |
260 | auto *STy = cast<StructType>(Val: Cur[0]->getType()); |
261 | for (int I = 0, E = STy->getNumElements(); I < E; ++I) |
262 | if (STy->getTypeAtIndex(N: I) == V->getType()) |
263 | return true; |
264 | return false; |
265 | }; |
266 | auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *>) { |
267 | if (auto *ArrayT = dyn_cast<ArrayType>(Val: Cur[0]->getType())) |
268 | return makeConstantsWithType(T: ArrayT->getElementType()); |
269 | |
270 | std::vector<Constant *> Result; |
271 | auto *STy = cast<StructType>(Val: Cur[0]->getType()); |
272 | for (int I = 0, E = STy->getNumElements(); I < E; ++I) |
273 | makeConstantsWithType(T: STy->getTypeAtIndex(N: I), Cs&: Result); |
274 | return Result; |
275 | }; |
276 | return {Pred, Make}; |
277 | } |
278 | |
279 | static SourcePred validInsertValueIndex() { |
280 | auto Pred = [](ArrayRef<Value *> Cur, const Value *V) { |
281 | if (auto *CI = dyn_cast<ConstantInt>(Val: V)) |
282 | if (CI->getBitWidth() == 32) { |
283 | Type *Indexed = ExtractValueInst::getIndexedType(Agg: Cur[0]->getType(), |
284 | Idxs: CI->getZExtValue()); |
285 | return Indexed == Cur[1]->getType(); |
286 | } |
287 | return false; |
288 | }; |
289 | auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *> Ts) { |
290 | std::vector<Constant *> Result; |
291 | auto *Int32Ty = Type::getInt32Ty(C&: Cur[0]->getContext()); |
292 | auto *BaseTy = Cur[0]->getType(); |
293 | int I = 0; |
294 | while (Type *Indexed = ExtractValueInst::getIndexedType(Agg: BaseTy, Idxs: I)) { |
295 | if (Indexed == Cur[1]->getType()) |
296 | Result.push_back(x: ConstantInt::get(Ty: Int32Ty, V: I)); |
297 | ++I; |
298 | } |
299 | return Result; |
300 | }; |
301 | return {Pred, Make}; |
302 | } |
303 | |
304 | OpDescriptor llvm::fuzzerop::insertValueDescriptor(unsigned Weight) { |
305 | auto buildInsert = [](ArrayRef<Value *> Srcs, BasicBlock::iterator InsertPt) { |
306 | // TODO: It's pretty inefficient to shuffle this all through constants. |
307 | unsigned Idx = cast<ConstantInt>(Val: Srcs[2])->getZExtValue(); |
308 | return InsertValueInst::Create(Agg: Srcs[0], Val: Srcs[1], Idxs: {Idx}, NameStr: "I" , InsertBefore: InsertPt); |
309 | }; |
310 | return { |
311 | .Weight: Weight, |
312 | .SourcePreds: {anyAggregateType(), matchScalarInAggregate(), validInsertValueIndex()}, |
313 | .BuilderFunc: buildInsert}; |
314 | } |
315 | |
316 | OpDescriptor llvm::fuzzerop::(unsigned Weight) { |
317 | auto = [](ArrayRef<Value *> Srcs, |
318 | BasicBlock::iterator InsertPt) { |
319 | return ExtractElementInst::Create(Vec: Srcs[0], Idx: Srcs[1], NameStr: "E" , InsertBefore: InsertPt); |
320 | }; |
321 | // TODO: Try to avoid undefined accesses. |
322 | return {.Weight: Weight, .SourcePreds: {anyVectorType(), anyIntType()}, .BuilderFunc: buildExtract}; |
323 | } |
324 | |
325 | OpDescriptor llvm::fuzzerop::insertElementDescriptor(unsigned Weight) { |
326 | auto buildInsert = [](ArrayRef<Value *> Srcs, BasicBlock::iterator InsertPt) { |
327 | return InsertElementInst::Create(Vec: Srcs[0], NewElt: Srcs[1], Idx: Srcs[2], NameStr: "I" , InsertBefore: InsertPt); |
328 | }; |
329 | // TODO: Try to avoid undefined accesses. |
330 | return {.Weight: Weight, |
331 | .SourcePreds: {anyVectorType(), matchScalarOfFirstType(), anyIntType()}, |
332 | .BuilderFunc: buildInsert}; |
333 | } |
334 | |
335 | static SourcePred validShuffleVectorIndex() { |
336 | auto Pred = [](ArrayRef<Value *> Cur, const Value *V) { |
337 | return ShuffleVectorInst::isValidOperands(V1: Cur[0], V2: Cur[1], Mask: V); |
338 | }; |
339 | auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *> Ts) { |
340 | auto *FirstTy = cast<VectorType>(Val: Cur[0]->getType()); |
341 | auto *Int32Ty = Type::getInt32Ty(C&: Cur[0]->getContext()); |
342 | // TODO: It's straighforward to make up reasonable values, but listing them |
343 | // exhaustively would be insane. Come up with a couple of sensible ones. |
344 | return std::vector<Constant *>{ |
345 | PoisonValue::get(T: VectorType::get(ElementType: Int32Ty, EC: FirstTy->getElementCount()))}; |
346 | }; |
347 | return {Pred, Make}; |
348 | } |
349 | |
350 | OpDescriptor llvm::fuzzerop::shuffleVectorDescriptor(unsigned Weight) { |
351 | auto buildShuffle = [](ArrayRef<Value *> Srcs, |
352 | BasicBlock::iterator InsertPt) { |
353 | return new ShuffleVectorInst(Srcs[0], Srcs[1], Srcs[2], "S" , InsertPt); |
354 | }; |
355 | return {.Weight: Weight, |
356 | .SourcePreds: {anyVectorType(), matchFirstType(), validShuffleVectorIndex()}, |
357 | .BuilderFunc: buildShuffle}; |
358 | } |
359 | |