1//===-- IRMutator.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/FuzzMutate/IRMutator.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SmallSet.h"
12#include "llvm/Analysis/TargetLibraryInfo.h"
13#include "llvm/Bitcode/BitcodeReader.h"
14#include "llvm/Bitcode/BitcodeWriter.h"
15#include "llvm/FuzzMutate/Operations.h"
16#include "llvm/FuzzMutate/Random.h"
17#include "llvm/FuzzMutate/RandomIRBuilder.h"
18#include "llvm/IR/BasicBlock.h"
19#include "llvm/IR/FMF.h"
20#include "llvm/IR/Function.h"
21#include "llvm/IR/InstIterator.h"
22#include "llvm/IR/Instructions.h"
23#include "llvm/IR/Module.h"
24#include "llvm/IR/Operator.h"
25#include "llvm/IR/PassInstrumentation.h"
26#include "llvm/IR/Verifier.h"
27#include "llvm/Support/MemoryBuffer.h"
28#include "llvm/Support/SourceMgr.h"
29#include "llvm/Transforms/Scalar/DCE.h"
30#include "llvm/Transforms/Utils/BasicBlockUtils.h"
31#include <map>
32#include <optional>
33
34using namespace llvm;
35
36void IRMutationStrategy::mutate(Module &M, RandomIRBuilder &IB) {
37 auto RS = makeSampler<Function *>(RandGen&: IB.Rand);
38 for (Function &F : M)
39 if (!F.isDeclaration())
40 RS.sample(Item: &F, /*Weight=*/1);
41
42 while (RS.totalWeight() < IB.MinFunctionNum) {
43 Function *F = IB.createFunctionDefinition(M);
44 RS.sample(Item: F, /*Weight=*/1);
45 }
46 mutate(F&: *RS.getSelection(), IB);
47}
48
49void IRMutationStrategy::mutate(Function &F, RandomIRBuilder &IB) {
50 auto Range = make_filter_range(Range: make_pointer_range(Range&: F),
51 Pred: [](BasicBlock *BB) { return !BB->isEHPad(); });
52
53 mutate(BB&: *makeSampler(RandGen&: IB.Rand, Items&: Range).getSelection(), IB);
54}
55
56void IRMutationStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
57 mutate(I&: *makeSampler(RandGen&: IB.Rand, Items: make_pointer_range(Range&: BB)).getSelection(), IB);
58}
59
60size_t llvm::IRMutator::getModuleSize(const Module &M) {
61 return M.getInstructionCount() + M.size() + M.global_size() + M.alias_size();
62}
63
64void IRMutator::mutateModule(Module &M, int Seed, size_t MaxSize) {
65 std::vector<Type *> Types;
66 for (const auto &Getter : AllowedTypes)
67 Types.push_back(x: Getter(M.getContext()));
68 RandomIRBuilder IB(Seed, Types);
69
70 size_t CurSize = IRMutator::getModuleSize(M);
71 auto RS = makeSampler<IRMutationStrategy *>(RandGen&: IB.Rand);
72 for (const auto &Strategy : Strategies)
73 RS.sample(Item: Strategy.get(),
74 Weight: Strategy->getWeight(CurrentSize: CurSize, MaxSize, CurrentWeight: RS.totalWeight()));
75 if (RS.totalWeight() == 0)
76 return;
77 auto Strategy = RS.getSelection();
78
79 Strategy->mutate(M, IB);
80}
81
82static void eliminateDeadCode(Function &F) {
83 FunctionPassManager FPM;
84 FPM.addPass(Pass: DCEPass());
85 FunctionAnalysisManager FAM;
86 FAM.registerPass(PassBuilder: [&] { return TargetLibraryAnalysis(); });
87 FAM.registerPass(PassBuilder: [&] { return PassInstrumentationAnalysis(); });
88 FPM.run(IR&: F, AM&: FAM);
89}
90
91void InjectorIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
92 IRMutationStrategy::mutate(F, IB);
93 eliminateDeadCode(F);
94}
95
96std::vector<fuzzerop::OpDescriptor> InjectorIRStrategy::getDefaultOps() {
97 std::vector<fuzzerop::OpDescriptor> Ops;
98 describeFuzzerIntOps(Ops);
99 describeFuzzerFloatOps(Ops);
100 describeFuzzerControlFlowOps(Ops);
101 describeFuzzerPointerOps(Ops);
102 describeFuzzerAggregateOps(Ops);
103 describeFuzzerVectorOps(Ops);
104 return Ops;
105}
106
107std::optional<fuzzerop::OpDescriptor>
108InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) {
109 auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) {
110 return Op.SourcePreds[0].matches(Cur: {}, New: Src);
111 };
112 auto RS = makeSampler(RandGen&: IB.Rand, Items: make_filter_range(Range&: Operations, Pred: OpMatchesPred));
113 if (RS.isEmpty())
114 return std::nullopt;
115 return *RS;
116}
117
118static inline iterator_range<BasicBlock::iterator>
119getInsertionRange(BasicBlock &BB) {
120 auto End = BB.getTerminatingMustTailCall() ? std::prev(x: BB.end()) : BB.end();
121 return make_range(x: BB.getFirstInsertionPt(), y: End);
122}
123
124void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
125 SmallVector<Instruction *, 32> Insts;
126 for (Instruction &I : getInsertionRange(BB))
127 Insts.push_back(Elt: &I);
128 if (Insts.size() < 1)
129 return;
130
131 // Choose an insertion point for our new instruction.
132 size_t IP = uniform<size_t>(Gen&: IB.Rand, Min: 0, Max: Insts.size() - 1);
133
134 auto InstsBefore = ArrayRef(Insts).slice(N: 0, M: IP);
135 auto InstsAfter = ArrayRef(Insts).slice(N: IP);
136
137 // Choose a source, which will be used to constrain the operation selection.
138 SmallVector<Value *, 2> Srcs;
139 Srcs.push_back(Elt: IB.findOrCreateSource(BB, Insts: InstsBefore));
140
141 // Choose an operation that's constrained to be valid for the type of the
142 // source, collect any other sources it needs, and then build it.
143 auto OpDesc = chooseOperation(Src: Srcs[0], IB);
144 // Bail if no operation was found
145 if (!OpDesc)
146 return;
147
148 for (const auto &Pred : ArrayRef(OpDesc->SourcePreds).slice(N: 1))
149 Srcs.push_back(Elt: IB.findOrCreateSource(BB, Insts: InstsBefore, Srcs, Pred));
150
151 if (Value *Op = OpDesc->BuilderFunc(Srcs, Insts[IP])) {
152 // Find a sink and wire up the results of the operation.
153 IB.connectToSink(BB, Insts: InstsAfter, V: Op);
154 }
155}
156
157uint64_t InstDeleterIRStrategy::getWeight(size_t CurrentSize, size_t MaxSize,
158 uint64_t CurrentWeight) {
159 // If we have less than 200 bytes, panic and try to always delete.
160 if (CurrentSize > MaxSize - 200)
161 return CurrentWeight ? CurrentWeight * 100 : 1;
162 // Draw a line starting from when we only have 1k left and increasing linearly
163 // to double the current weight.
164 int64_t Line = (-2 * static_cast<int64_t>(CurrentWeight)) *
165 (static_cast<int64_t>(MaxSize) -
166 static_cast<int64_t>(CurrentSize) - 1000) /
167 1000;
168 // Clamp negative weights to zero.
169 if (Line < 0)
170 return 0;
171 return Line;
172}
173
174void InstDeleterIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
175 auto RS = makeSampler<Instruction *>(RandGen&: IB.Rand);
176 for (Instruction &Inst : instructions(F)) {
177 // TODO: We can't handle these instructions.
178 if (Inst.isTerminator() || Inst.isEHPad() || Inst.isSwiftError() ||
179 isa<PHINode>(Val: Inst))
180 continue;
181
182 RS.sample(Item: &Inst, /*Weight=*/1);
183 }
184 if (RS.isEmpty())
185 return;
186
187 // Delete the instruction.
188 mutate(Inst&: *RS.getSelection(), IB);
189 // Clean up any dead code that's left over after removing the instruction.
190 eliminateDeadCode(F);
191}
192
193void InstDeleterIRStrategy::mutate(Instruction &Inst, RandomIRBuilder &IB) {
194 assert(!Inst.isTerminator() && "Deleting terminators invalidates CFG");
195
196 if (Inst.getType()->isVoidTy()) {
197 // Instructions with void type (ie, store) have no uses to worry about. Just
198 // erase it and move on.
199 Inst.eraseFromParent();
200 return;
201 }
202
203 // Otherwise we need to find some other value with the right type to keep the
204 // users happy.
205 auto Pred = fuzzerop::onlyType(Only: Inst.getType());
206 auto RS = makeSampler<Value *>(RandGen&: IB.Rand);
207 SmallVector<Instruction *, 32> InstsBefore;
208 BasicBlock *BB = Inst.getParent();
209 for (auto I = BB->getFirstInsertionPt(), E = Inst.getIterator(); I != E;
210 ++I) {
211 if (Pred.matches(Cur: {}, New: &*I))
212 RS.sample(Item: &*I, /*Weight=*/1);
213 InstsBefore.push_back(Elt: &*I);
214 }
215 if (!RS)
216 RS.sample(Item: IB.newSource(BB&: *BB, Insts: InstsBefore, Srcs: {}, Pred), /*Weight=*/1);
217
218 Inst.replaceAllUsesWith(V: RS.getSelection());
219 Inst.eraseFromParent();
220}
221
222void InstModificationIRStrategy::mutate(Instruction &Inst,
223 RandomIRBuilder &IB) {
224 SmallVector<std::function<void()>, 8> Modifications;
225 CmpInst *CI = nullptr;
226 GetElementPtrInst *GEP = nullptr;
227 switch (Inst.getOpcode()) {
228 default:
229 break;
230 // Add nsw, nuw flag
231 case Instruction::Add:
232 case Instruction::Mul:
233 case Instruction::Sub:
234 case Instruction::Shl:
235 Modifications.push_back(
236 Elt: [&Inst]() { Inst.setHasNoSignedWrap(!Inst.hasNoSignedWrap()); });
237 Modifications.push_back(
238 Elt: [&Inst]() { Inst.setHasNoUnsignedWrap(!Inst.hasNoUnsignedWrap()); });
239 break;
240 case Instruction::ICmp:
241 CI = cast<ICmpInst>(Val: &Inst);
242 for (unsigned p = CmpInst::FIRST_ICMP_PREDICATE;
243 p <= CmpInst::LAST_ICMP_PREDICATE; p++) {
244 Modifications.push_back(
245 Elt: [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
246 }
247 break;
248 // Add inbound flag.
249 case Instruction::GetElementPtr:
250 GEP = cast<GetElementPtrInst>(Val: &Inst);
251 Modifications.push_back(
252 Elt: [GEP]() { GEP->setIsInBounds(!GEP->isInBounds()); });
253 break;
254 // Add exact flag.
255 case Instruction::UDiv:
256 case Instruction::SDiv:
257 case Instruction::LShr:
258 case Instruction::AShr:
259 Modifications.push_back(Elt: [&Inst] { Inst.setIsExact(!Inst.isExact()); });
260 break;
261
262 case Instruction::FCmp:
263 CI = cast<FCmpInst>(Val: &Inst);
264 for (unsigned p = CmpInst::FIRST_FCMP_PREDICATE;
265 p <= CmpInst::LAST_FCMP_PREDICATE; p++) {
266 Modifications.push_back(
267 Elt: [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
268 }
269 break;
270 }
271
272 // Add fast math flag if possible.
273 if (isa<FPMathOperator>(Val: &Inst)) {
274 // Try setting everything unless they are already on.
275 Modifications.push_back(
276 Elt: [&Inst] { Inst.setFast(!Inst.getFastMathFlags().all()); });
277 // Try unsetting everything unless they are already off.
278 Modifications.push_back(
279 Elt: [&Inst] { Inst.setFast(!Inst.getFastMathFlags().none()); });
280 // Individual setting by flipping the bit
281 Modifications.push_back(
282 Elt: [&Inst] { Inst.setHasAllowReassoc(!Inst.hasAllowReassoc()); });
283 Modifications.push_back(Elt: [&Inst] { Inst.setHasNoNaNs(!Inst.hasNoNaNs()); });
284 Modifications.push_back(Elt: [&Inst] { Inst.setHasNoInfs(!Inst.hasNoInfs()); });
285 Modifications.push_back(
286 Elt: [&Inst] { Inst.setHasNoSignedZeros(!Inst.hasNoSignedZeros()); });
287 Modifications.push_back(
288 Elt: [&Inst] { Inst.setHasAllowReciprocal(!Inst.hasAllowReciprocal()); });
289 Modifications.push_back(
290 Elt: [&Inst] { Inst.setHasAllowContract(!Inst.hasAllowContract()); });
291 Modifications.push_back(
292 Elt: [&Inst] { Inst.setHasApproxFunc(!Inst.hasApproxFunc()); });
293 }
294
295 // Randomly switch operands of instructions
296 std::pair<int, int> NoneItem({-1, -1}), ShuffleItems(NoneItem);
297 switch (Inst.getOpcode()) {
298 case Instruction::SDiv:
299 case Instruction::UDiv:
300 case Instruction::SRem:
301 case Instruction::URem:
302 case Instruction::FDiv:
303 case Instruction::FRem: {
304 // Verify that the after shuffle the second operand is not
305 // constant 0.
306 Value *Operand = Inst.getOperand(i: 0);
307 if (Constant *C = dyn_cast<Constant>(Val: Operand)) {
308 if (!C->isZeroValue()) {
309 ShuffleItems = {0, 1};
310 }
311 }
312 break;
313 }
314 case Instruction::Select:
315 ShuffleItems = {1, 2};
316 break;
317 case Instruction::Add:
318 case Instruction::Sub:
319 case Instruction::Mul:
320 case Instruction::Shl:
321 case Instruction::LShr:
322 case Instruction::AShr:
323 case Instruction::And:
324 case Instruction::Or:
325 case Instruction::Xor:
326 case Instruction::FAdd:
327 case Instruction::FSub:
328 case Instruction::FMul:
329 case Instruction::ICmp:
330 case Instruction::FCmp:
331 case Instruction::ShuffleVector:
332 ShuffleItems = {0, 1};
333 break;
334 }
335 if (ShuffleItems != NoneItem) {
336 Modifications.push_back(Elt: [&Inst, &ShuffleItems]() {
337 Value *Op0 = Inst.getOperand(i: ShuffleItems.first);
338 Inst.setOperand(i: ShuffleItems.first, Val: Inst.getOperand(i: ShuffleItems.second));
339 Inst.setOperand(i: ShuffleItems.second, Val: Op0);
340 });
341 }
342
343 auto RS = makeSampler(RandGen&: IB.Rand, Items&: Modifications);
344 if (RS)
345 RS.getSelection()();
346}
347
348/// Return a case value that is not already taken to make sure we don't have two
349/// cases with same value.
350static uint64_t getUniqueCaseValue(SmallSet<uint64_t, 4> &CasesTaken,
351 uint64_t MaxValue, RandomIRBuilder &IB) {
352 uint64_t tmp;
353 do {
354 tmp = uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: MaxValue);
355 } while (CasesTaken.count(V: tmp) != 0);
356 CasesTaken.insert(V: tmp);
357 return tmp;
358}
359
360void InsertFunctionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
361 Module *M = BB.getParent()->getParent();
362 // If nullptr is selected, we will create a new function declaration.
363 SmallVector<Function *, 32> Functions({nullptr});
364 for (Function &F : M->functions()) {
365 Functions.push_back(Elt: &F);
366 }
367
368 auto RS = makeSampler(RandGen&: IB.Rand, Items&: Functions);
369 Function *F = RS.getSelection();
370 // Some functions accept metadata type or token type as arguments.
371 // We don't call those functions for now.
372 // For example, `@llvm.dbg.declare(metadata, metadata, metadata)`
373 // https://llvm.org/docs/SourceLevelDebugging.html#llvm-dbg-declare
374 auto IsUnsupportedTy = [](Type *T) {
375 return T->isMetadataTy() || T->isTokenTy();
376 };
377 if (!F || IsUnsupportedTy(F->getReturnType()) ||
378 any_of(Range: F->getFunctionType()->params(), P: IsUnsupportedTy)) {
379 F = IB.createFunctionDeclaration(M&: *M);
380 }
381
382 FunctionType *FTy = F->getFunctionType();
383 SmallVector<fuzzerop::SourcePred, 2> SourcePreds;
384 if (!F->arg_empty()) {
385 for (Type *ArgTy : FTy->params()) {
386 SourcePreds.push_back(Elt: fuzzerop::onlyType(Only: ArgTy));
387 }
388 }
389 bool isRetVoid = (F->getReturnType() == Type::getVoidTy(C&: M->getContext()));
390 auto BuilderFunc = [FTy, F, isRetVoid](ArrayRef<Value *> Srcs,
391 Instruction *Inst) {
392 StringRef Name = isRetVoid ? nullptr : "C";
393 CallInst *Call = CallInst::Create(Ty: FTy, Func: F, Args: Srcs, NameStr: Name, InsertBefore: Inst);
394 // Don't return this call inst if it return void as it can't be sinked.
395 return isRetVoid ? nullptr : Call;
396 };
397
398 SmallVector<Instruction *, 32> Insts;
399 for (Instruction &I : getInsertionRange(BB))
400 Insts.push_back(Elt: &I);
401 if (Insts.size() < 1)
402 return;
403
404 // Choose an insertion point for our new call instruction.
405 uint64_t IP = uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: Insts.size() - 1);
406
407 auto InstsBefore = ArrayRef(Insts).slice(N: 0, M: IP);
408 auto InstsAfter = ArrayRef(Insts).slice(N: IP);
409
410 // Choose a source, which will be used to constrain the operation selection.
411 SmallVector<Value *, 2> Srcs;
412
413 for (const auto &Pred : ArrayRef(SourcePreds)) {
414 Srcs.push_back(Elt: IB.findOrCreateSource(BB, Insts: InstsBefore, Srcs, Pred));
415 }
416
417 if (Value *Op = BuilderFunc(Srcs, Insts[IP])) {
418 // Find a sink and wire up the results of the operation.
419 IB.connectToSink(BB, Insts: InstsAfter, V: Op);
420 }
421}
422
423void InsertCFGStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
424 SmallVector<Instruction *, 32> Insts;
425 for (Instruction &I : getInsertionRange(BB))
426 Insts.push_back(Elt: &I);
427 if (Insts.size() < 1)
428 return;
429
430 // Choose a point where we split the block.
431 uint64_t IP = uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: Insts.size() - 1);
432 auto InstsBeforeSplit = ArrayRef(Insts).slice(N: 0, M: IP);
433
434 // `Sink` inherits Blocks' terminator, `Source` will have a BranchInst
435 // directly jumps to `Sink`. Here, we have to create a new terminator for
436 // `Source`.
437 BasicBlock *Block = Insts[IP]->getParent();
438 BasicBlock *Source = Block;
439 BasicBlock *Sink = Block->splitBasicBlock(I: Insts[IP], BBName: "BB");
440
441 Function *F = BB.getParent();
442 LLVMContext &C = F->getParent()->getContext();
443 // A coin decides if it is branch or switch
444 if (uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: 1)) {
445 // Branch
446 BasicBlock *IfTrue = BasicBlock::Create(Context&: C, Name: "T", Parent: F);
447 BasicBlock *IfFalse = BasicBlock::Create(Context&: C, Name: "F", Parent: F);
448 Value *Cond =
449 IB.findOrCreateSource(BB&: *Source, Insts: InstsBeforeSplit, Srcs: {},
450 Pred: fuzzerop::onlyType(Only: Type::getInt1Ty(C)), allowConstant: false);
451 BranchInst *Branch = BranchInst::Create(IfTrue, IfFalse, Cond);
452 // Remove the old terminator.
453 ReplaceInstWithInst(From: Source->getTerminator(), To: Branch);
454 // Connect these blocks to `Sink`
455 connectBlocksToSink(Blocks: {IfTrue, IfFalse}, Sink, IB);
456 } else {
457 // Switch
458 // Determine Integer type, it IS possible we use a boolean to switch.
459 auto RS =
460 makeSampler(RandGen&: IB.Rand, Items: make_filter_range(Range&: IB.KnownTypes, Pred: [](Type *Ty) {
461 return Ty->isIntegerTy();
462 }));
463 assert(RS && "There is no integer type in all allowed types, is the "
464 "setting correct?");
465 Type *Ty = RS.getSelection();
466 IntegerType *IntTy = cast<IntegerType>(Val: Ty);
467
468 uint64_t BitSize = IntTy->getBitWidth();
469 uint64_t MaxCaseVal =
470 (BitSize >= 64) ? (uint64_t)-1 : ((uint64_t)1 << BitSize) - 1;
471 // Create Switch inst in Block
472 Value *Cond = IB.findOrCreateSource(BB&: *Source, Insts: InstsBeforeSplit, Srcs: {},
473 Pred: fuzzerop::onlyType(Only: IntTy), allowConstant: false);
474 BasicBlock *DefaultBlock = BasicBlock::Create(Context&: C, Name: "SW_D", Parent: F);
475 uint64_t NumCases = uniform<uint64_t>(Gen&: IB.Rand, Min: 1, Max: MaxNumCases);
476 NumCases = (NumCases > MaxCaseVal) ? MaxCaseVal + 1 : NumCases;
477 SwitchInst *Switch = SwitchInst::Create(Value: Cond, Default: DefaultBlock, NumCases);
478 // Remove the old terminator.
479 ReplaceInstWithInst(From: Source->getTerminator(), To: Switch);
480
481 // Create blocks, for each block assign a case value.
482 SmallVector<BasicBlock *, 4> Blocks({DefaultBlock});
483 SmallSet<uint64_t, 4> CasesTaken;
484 for (uint64_t i = 0; i < NumCases; i++) {
485 uint64_t CaseVal = getUniqueCaseValue(CasesTaken, MaxValue: MaxCaseVal, IB);
486 BasicBlock *CaseBlock = BasicBlock::Create(Context&: C, Name: "SW_C", Parent: F);
487 ConstantInt *OnValue = ConstantInt::get(Ty: IntTy, V: CaseVal);
488 Switch->addCase(OnVal: OnValue, Dest: CaseBlock);
489 Blocks.push_back(Elt: CaseBlock);
490 }
491
492 // Connect these blocks to `Sink`
493 connectBlocksToSink(Blocks, Sink, IB);
494 }
495}
496
497/// The caller has to guarantee that these blocks are "empty", i.e. it doesn't
498/// even have terminator.
499void InsertCFGStrategy::connectBlocksToSink(ArrayRef<BasicBlock *> Blocks,
500 BasicBlock *Sink,
501 RandomIRBuilder &IB) {
502 uint64_t DirectSinkIdx = uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: Blocks.size() - 1);
503 for (uint64_t i = 0; i < Blocks.size(); i++) {
504 // We have at least one block that directly goes to sink.
505 CFGToSink ToSink = (i == DirectSinkIdx)
506 ? CFGToSink::DirectSink
507 : static_cast<CFGToSink>(uniform<uint64_t>(
508 Gen&: IB.Rand, Min: 0, Max: CFGToSink::EndOfCFGToLink - 1));
509 BasicBlock *BB = Blocks[i];
510 Function *F = BB->getParent();
511 LLVMContext &C = F->getParent()->getContext();
512 switch (ToSink) {
513 case CFGToSink::Return: {
514 Type *RetTy = F->getReturnType();
515 Value *RetValue = nullptr;
516 if (!RetTy->isVoidTy())
517 RetValue =
518 IB.findOrCreateSource(BB&: *BB, Insts: {}, Srcs: {}, Pred: fuzzerop::onlyType(Only: RetTy));
519 ReturnInst::Create(C, retVal: RetValue, InsertBefore: BB);
520 break;
521 }
522 case CFGToSink::DirectSink: {
523 BranchInst::Create(IfTrue: Sink, InsertBefore: BB);
524 break;
525 }
526 case CFGToSink::SinkOrSelfLoop: {
527 SmallVector<BasicBlock *, 2> Branches({Sink, BB});
528 // A coin decides which block is true branch.
529 uint64_t coin = uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: 1);
530 Value *Cond = IB.findOrCreateSource(
531 BB&: *BB, Insts: {}, Srcs: {}, Pred: fuzzerop::onlyType(Only: Type::getInt1Ty(C)), allowConstant: false);
532 BranchInst::Create(IfTrue: Branches[coin], IfFalse: Branches[1 - coin], Cond, InsertBefore: BB);
533 break;
534 }
535 case CFGToSink::EndOfCFGToLink:
536 llvm_unreachable("EndOfCFGToLink executed, something's wrong.");
537 }
538 }
539}
540
541void InsertPHIStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
542 // Can't insert PHI node to entry node.
543 if (&BB == &BB.getParent()->getEntryBlock())
544 return;
545 Type *Ty = IB.randomType();
546 PHINode *PHI = PHINode::Create(Ty, NumReservedValues: llvm::pred_size(BB: &BB), NameStr: "", InsertBefore: &BB.front());
547
548 // Use a map to make sure the same incoming basic block has the same value.
549 DenseMap<BasicBlock *, Value *> IncomingValues;
550 for (BasicBlock *Pred : predecessors(BB: &BB)) {
551 Value *Src = IncomingValues[Pred];
552 // If `Pred` is not in the map yet, we'll get a nullptr.
553 if (!Src) {
554 SmallVector<Instruction *, 32> Insts;
555 for (auto I = Pred->begin(); I != Pred->end(); ++I)
556 Insts.push_back(Elt: &*I);
557 // There is no need to inform IB what previously used values are if we are
558 // using `onlyType`
559 Src = IB.findOrCreateSource(BB&: *Pred, Insts, Srcs: {}, Pred: fuzzerop::onlyType(Only: Ty));
560 IncomingValues[Pred] = Src;
561 }
562 PHI->addIncoming(V: Src, BB: Pred);
563 }
564 SmallVector<Instruction *, 32> InstsAfter;
565 for (Instruction &I : getInsertionRange(BB))
566 InstsAfter.push_back(Elt: &I);
567 IB.connectToSink(BB, Insts: InstsAfter, V: PHI);
568}
569
570void SinkInstructionStrategy::mutate(Function &F, RandomIRBuilder &IB) {
571 for (BasicBlock &BB : F) {
572 this->mutate(BB, IB);
573 }
574}
575void SinkInstructionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
576 SmallVector<Instruction *, 32> Insts;
577 for (Instruction &I : getInsertionRange(BB))
578 Insts.push_back(Elt: &I);
579 if (Insts.size() < 1)
580 return;
581 // Choose an Instruction to mutate.
582 uint64_t Idx = uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: Insts.size() - 1);
583 Instruction *Inst = Insts[Idx];
584 // `Idx + 1` so we don't sink to ourselves.
585 auto InstsAfter = ArrayRef(Insts).slice(N: Idx + 1);
586 Type *Ty = Inst->getType();
587 // Don't sink terminators, void function calls, token, etc.
588 if (!Ty->isVoidTy() && !Ty->isTokenTy())
589 // Find a new sink and wire up the results of the operation.
590 IB.connectToSink(BB, Insts: InstsAfter, V: Inst);
591}
592
593void ShuffleBlockStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
594 // A deterministic alternative to SmallPtrSet with the same lookup
595 // performance.
596 std::map<size_t, Instruction *> AliveInsts;
597 std::map<Instruction *, size_t> AliveInstsLookup;
598 size_t InsertIdx = 0;
599 for (auto &I : make_early_inc_range(Range: make_range(
600 x: BB.getFirstInsertionPt(), y: BB.getTerminator()->getIterator()))) {
601 // First gather all instructions that can be shuffled. Don't take
602 // terminator.
603 AliveInsts.insert(x: {InsertIdx, &I});
604 AliveInstsLookup.insert(x: {&I, InsertIdx++});
605 // Then remove these instructions from the block
606 I.removeFromParent();
607 }
608
609 // Shuffle these instructions using topological sort.
610 // Returns false if all current instruction's dependencies in this block have
611 // been shuffled. If so, this instruction can be shuffled too.
612 auto hasAliveParent = [&AliveInsts, &AliveInstsLookup](size_t Index) {
613 for (Value *O : AliveInsts[Index]->operands()) {
614 Instruction *P = dyn_cast<Instruction>(Val: O);
615 if (P && AliveInstsLookup.count(x: P))
616 return true;
617 }
618 return false;
619 };
620 // Get all alive instructions that depend on the current instruction.
621 // Takes Instruction* instead of index because the instruction is already
622 // shuffled.
623 auto getAliveChildren = [&AliveInstsLookup](Instruction *I) {
624 SmallSetVector<size_t, 8> Children;
625 for (Value *U : I->users()) {
626 Instruction *P = dyn_cast<Instruction>(Val: U);
627 if (P && AliveInstsLookup.count(x: P))
628 Children.insert(X: AliveInstsLookup[P]);
629 }
630 return Children;
631 };
632 SmallSet<size_t, 8> RootIndices;
633 SmallVector<Instruction *, 8> Insts;
634 for (const auto &[Index, Inst] : AliveInsts) {
635 if (!hasAliveParent(Index))
636 RootIndices.insert(V: Index);
637 }
638 // Topological sort by randomly selecting a node without a parent, or root.
639 while (!RootIndices.empty()) {
640 auto RS = makeSampler<size_t>(RandGen&: IB.Rand);
641 for (size_t RootIdx : RootIndices)
642 RS.sample(Item: RootIdx, Weight: 1);
643 size_t RootIdx = RS.getSelection();
644
645 RootIndices.erase(V: RootIdx);
646 Instruction *Root = AliveInsts[RootIdx];
647 AliveInsts.erase(x: RootIdx);
648 AliveInstsLookup.erase(x: Root);
649 Insts.push_back(Elt: Root);
650
651 for (size_t Child : getAliveChildren(Root)) {
652 if (!hasAliveParent(Child)) {
653 RootIndices.insert(V: Child);
654 }
655 }
656 }
657
658 Instruction *Terminator = BB.getTerminator();
659 // Then put instructions back.
660 for (Instruction *I : Insts) {
661 I->insertBefore(InsertPos: Terminator);
662 }
663}
664
665std::unique_ptr<Module> llvm::parseModule(const uint8_t *Data, size_t Size,
666 LLVMContext &Context) {
667
668 if (Size <= 1)
669 // We get bogus data given an empty corpus - just create a new module.
670 return std::make_unique<Module>(args: "M", args&: Context);
671
672 auto Buffer = MemoryBuffer::getMemBuffer(
673 InputData: StringRef(reinterpret_cast<const char *>(Data), Size), BufferName: "Fuzzer input",
674 /*RequiresNullTerminator=*/false);
675
676 SMDiagnostic Err;
677 auto M = parseBitcodeFile(Buffer: Buffer->getMemBufferRef(), Context);
678 if (Error E = M.takeError()) {
679 errs() << toString(E: std::move(E)) << "\n";
680 return nullptr;
681 }
682 return std::move(M.get());
683}
684
685size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) {
686 std::string Buf;
687 {
688 raw_string_ostream OS(Buf);
689 WriteBitcodeToFile(M, Out&: OS);
690 }
691 if (Buf.size() > MaxSize)
692 return 0;
693 memcpy(dest: Dest, src: Buf.data(), n: Buf.size());
694 return Buf.size();
695}
696
697std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size,
698 LLVMContext &Context) {
699 auto M = parseModule(Data, Size, Context);
700 if (!M || verifyModule(M: *M, OS: &errs()))
701 return nullptr;
702
703 return M;
704}
705