1//===-- IRMutator.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/FuzzMutate/IRMutator.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SmallSet.h"
12#include "llvm/Analysis/TargetLibraryInfo.h"
13#include "llvm/Bitcode/BitcodeReader.h"
14#include "llvm/Bitcode/BitcodeWriter.h"
15#include "llvm/FuzzMutate/Operations.h"
16#include "llvm/FuzzMutate/Random.h"
17#include "llvm/FuzzMutate/RandomIRBuilder.h"
18#include "llvm/IR/BasicBlock.h"
19#include "llvm/IR/FMF.h"
20#include "llvm/IR/Function.h"
21#include "llvm/IR/InstIterator.h"
22#include "llvm/IR/Instructions.h"
23#include "llvm/IR/IntrinsicInst.h"
24#include "llvm/IR/IntrinsicsAMDGPU.h"
25#include "llvm/IR/Module.h"
26#include "llvm/IR/Operator.h"
27#include "llvm/IR/PassInstrumentation.h"
28#include "llvm/IR/Verifier.h"
29#include "llvm/Support/MemoryBuffer.h"
30#include "llvm/Support/SourceMgr.h"
31#include "llvm/Transforms/Scalar/DCE.h"
32#include "llvm/Transforms/Utils/BasicBlockUtils.h"
33#include <map>
34#include <optional>
35
36using namespace llvm;
37
38void IRMutationStrategy::mutate(Module &M, RandomIRBuilder &IB) {
39 auto RS = makeSampler<Function *>(RandGen&: IB.Rand);
40 for (Function &F : M)
41 if (!F.isDeclaration())
42 RS.sample(Item: &F, /*Weight=*/1);
43
44 while (RS.totalWeight() < IB.MinFunctionNum) {
45 Function *F = IB.createFunctionDefinition(M);
46 RS.sample(Item: F, /*Weight=*/1);
47 }
48 mutate(F&: *RS.getSelection(), IB);
49}
50
51void IRMutationStrategy::mutate(Function &F, RandomIRBuilder &IB) {
52 auto Range = make_filter_range(Range: make_pointer_range(Range&: F),
53 Pred: [](BasicBlock *BB) { return !BB->isEHPad(); });
54
55 mutate(BB&: *makeSampler(RandGen&: IB.Rand, Items&: Range).getSelection(), IB);
56}
57
58void IRMutationStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
59 mutate(I&: *makeSampler(RandGen&: IB.Rand, Items: make_pointer_range(Range&: BB)).getSelection(), IB);
60}
61
62size_t llvm::IRMutator::getModuleSize(const Module &M) {
63 return M.getInstructionCount() + M.size() + M.global_size() + M.alias_size();
64}
65
66void IRMutator::mutateModule(Module &M, int Seed, size_t MaxSize) {
67 std::vector<Type *> Types;
68 for (const auto &Getter : AllowedTypes)
69 Types.push_back(x: Getter(M.getContext()));
70 RandomIRBuilder IB(Seed, Types);
71
72 size_t CurSize = IRMutator::getModuleSize(M);
73 auto RS = makeSampler<IRMutationStrategy *>(RandGen&: IB.Rand);
74 for (const auto &Strategy : Strategies)
75 RS.sample(Item: Strategy.get(),
76 Weight: Strategy->getWeight(CurrentSize: CurSize, MaxSize, CurrentWeight: RS.totalWeight()));
77 if (RS.totalWeight() == 0)
78 return;
79 auto Strategy = RS.getSelection();
80
81 Strategy->mutate(M, IB);
82}
83
84static void eliminateDeadCode(Function &F) {
85 FunctionPassManager FPM;
86 FPM.addPass(Pass: DCEPass());
87 FunctionAnalysisManager FAM;
88 FAM.registerPass(PassBuilder: [&] { return TargetLibraryAnalysis(); });
89 FAM.registerPass(PassBuilder: [&] { return PassInstrumentationAnalysis(); });
90 FPM.run(IR&: F, AM&: FAM);
91}
92
93void InjectorIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
94 IRMutationStrategy::mutate(F, IB);
95 eliminateDeadCode(F);
96}
97
98std::vector<fuzzerop::OpDescriptor> InjectorIRStrategy::getDefaultOps() {
99 std::vector<fuzzerop::OpDescriptor> Ops;
100 describeFuzzerIntOps(Ops);
101 describeFuzzerFloatOps(Ops);
102 describeFuzzerControlFlowOps(Ops);
103 describeFuzzerPointerOps(Ops);
104 describeFuzzerAggregateOps(Ops);
105 describeFuzzerVectorOps(Ops);
106 return Ops;
107}
108
109std::optional<fuzzerop::OpDescriptor>
110InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) {
111 auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) {
112 return Op.SourcePreds[0].matches(Cur: {}, New: Src);
113 };
114 auto RS = makeSampler(RandGen&: IB.Rand, Items: make_filter_range(Range&: Operations, Pred: OpMatchesPred));
115 if (RS.isEmpty())
116 return std::nullopt;
117 return *RS;
118}
119
120static inline Instruction *getEffectiveTerminator(BasicBlock &BB) {
121 if (Instruction *I = BB.getTerminatingMustTailCall()) {
122 return I;
123 } else {
124 // Certain intrinsics, such as @llvm.amdgcn.cs.chain, must be immediately
125 // followed by an unreachable instruction..
126 if (UnreachableInst *UI = dyn_cast<UnreachableInst>(Val: BB.getTerminator())) {
127 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: UI->getPrevNode())) {
128 return II;
129 }
130 }
131 }
132
133 return BB.getTerminator();
134}
135
136static inline BasicBlock::iterator getEndIterator(BasicBlock &BB) {
137 auto End = BB.end();
138
139 if (BB.empty()) {
140 return End;
141 }
142
143 Instruction *EffectiveTerminator = getEffectiveTerminator(BB);
144 if (EffectiveTerminator != BB.getTerminator()) {
145 // Adjust range for special cases such as tail call.
146 End = std::prev(x: BB.end());
147 }
148
149 return End;
150}
151
152static inline iterator_range<BasicBlock::iterator>
153getInsertionRange(BasicBlock &BB) {
154 auto End = getEndIterator(BB);
155 return make_range(x: BB.getFirstInsertionPt(), y: End);
156}
157
158void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
159 SmallVector<Instruction *, 32> Insts(
160 llvm::make_pointer_range(Range: getInsertionRange(BB)));
161 if (Insts.size() < 1)
162 return;
163
164 // Choose an insertion point for our new instruction.
165 size_t IP = uniform<size_t>(Gen&: IB.Rand, Min: 0, Max: Insts.size() - 1);
166
167 auto InstsBefore = ArrayRef(Insts).slice(N: 0, M: IP);
168 auto InstsAfter = ArrayRef(Insts).slice(N: IP);
169
170 // Choose a source, which will be used to constrain the operation selection.
171 SmallVector<Value *, 2> Srcs;
172 Srcs.push_back(Elt: IB.findOrCreateSource(BB, Insts: InstsBefore));
173
174 // Choose an operation that's constrained to be valid for the type of the
175 // source, collect any other sources it needs, and then build it.
176 auto OpDesc = chooseOperation(Src: Srcs[0], IB);
177 // Bail if no operation was found
178 if (!OpDesc)
179 return;
180
181 for (const auto &Pred : ArrayRef(OpDesc->SourcePreds).slice(N: 1))
182 Srcs.push_back(Elt: IB.findOrCreateSource(BB, Insts: InstsBefore, Srcs, Pred));
183
184 if (Value *Op = OpDesc->BuilderFunc(Srcs, Insts[IP]->getIterator())) {
185 // Find a sink and wire up the results of the operation.
186 IB.connectToSink(BB, Insts: InstsAfter, V: Op);
187 }
188}
189
190uint64_t InstDeleterIRStrategy::getWeight(size_t CurrentSize, size_t MaxSize,
191 uint64_t CurrentWeight) {
192 // If we have less than 200 bytes, panic and try to always delete.
193 if (CurrentSize > MaxSize - 200)
194 return CurrentWeight ? CurrentWeight * 100 : 1;
195 // Draw a line starting from when we only have 1k left and increasing linearly
196 // to double the current weight.
197 int64_t Line = (-2 * static_cast<int64_t>(CurrentWeight)) *
198 (static_cast<int64_t>(MaxSize) -
199 static_cast<int64_t>(CurrentSize) - 1000) /
200 1000;
201 // Clamp negative weights to zero.
202 if (Line < 0)
203 return 0;
204 return Line;
205}
206
207void InstDeleterIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
208 auto RS = makeSampler<Instruction *>(RandGen&: IB.Rand);
209 for (Instruction &Inst : instructions(F)) {
210 // TODO: We can't handle these instructions.
211 if (Inst.isTerminator() || Inst.isEHPad() || Inst.isSwiftError() ||
212 isa<PHINode>(Val: Inst))
213 continue;
214
215 RS.sample(Item: &Inst, /*Weight=*/1);
216 }
217 if (RS.isEmpty())
218 return;
219
220 // Delete the instruction.
221 mutate(Inst&: *RS.getSelection(), IB);
222 // Clean up any dead code that's left over after removing the instruction.
223 eliminateDeadCode(F);
224}
225
226void InstDeleterIRStrategy::mutate(Instruction &Inst, RandomIRBuilder &IB) {
227 assert(!Inst.isTerminator() && "Deleting terminators invalidates CFG");
228
229 if (Inst.getType()->isVoidTy()) {
230 // Instructions with void type (ie, store) have no uses to worry about. Just
231 // erase it and move on.
232 Inst.eraseFromParent();
233 return;
234 }
235
236 // Otherwise we need to find some other value with the right type to keep the
237 // users happy.
238 auto Pred = fuzzerop::onlyType(Only: Inst.getType());
239 auto RS = makeSampler<Value *>(RandGen&: IB.Rand);
240 SmallVector<Instruction *, 32> InstsBefore;
241 BasicBlock *BB = Inst.getParent();
242 for (auto I = BB->getFirstInsertionPt(), E = Inst.getIterator(); I != E;
243 ++I) {
244 if (Pred.matches(Cur: {}, New: &*I))
245 RS.sample(Item: &*I, /*Weight=*/1);
246 InstsBefore.push_back(Elt: &*I);
247 }
248 if (!RS)
249 RS.sample(Item: IB.newSource(BB&: *BB, Insts: InstsBefore, Srcs: {}, Pred), /*Weight=*/1);
250
251 Inst.replaceAllUsesWith(V: RS.getSelection());
252 Inst.eraseFromParent();
253}
254
255void InstModificationIRStrategy::mutate(Instruction &Inst,
256 RandomIRBuilder &IB) {
257 SmallVector<std::function<void()>, 8> Modifications;
258 CmpInst *CI = nullptr;
259 GetElementPtrInst *GEP = nullptr;
260 switch (Inst.getOpcode()) {
261 default:
262 break;
263 // Add nsw, nuw flag
264 case Instruction::Add:
265 case Instruction::Mul:
266 case Instruction::Sub:
267 case Instruction::Shl:
268 Modifications.push_back(
269 Elt: [&Inst]() { Inst.setHasNoSignedWrap(!Inst.hasNoSignedWrap()); });
270 Modifications.push_back(
271 Elt: [&Inst]() { Inst.setHasNoUnsignedWrap(!Inst.hasNoUnsignedWrap()); });
272 break;
273 case Instruction::ICmp:
274 CI = cast<ICmpInst>(Val: &Inst);
275 for (unsigned p = CmpInst::FIRST_ICMP_PREDICATE;
276 p <= CmpInst::LAST_ICMP_PREDICATE; p++) {
277 Modifications.push_back(
278 Elt: [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
279 }
280 break;
281 // Add inbound flag.
282 case Instruction::GetElementPtr:
283 GEP = cast<GetElementPtrInst>(Val: &Inst);
284 Modifications.push_back(
285 Elt: [GEP]() { GEP->setIsInBounds(!GEP->isInBounds()); });
286 break;
287 // Add exact flag.
288 case Instruction::UDiv:
289 case Instruction::SDiv:
290 case Instruction::LShr:
291 case Instruction::AShr:
292 Modifications.push_back(Elt: [&Inst] { Inst.setIsExact(!Inst.isExact()); });
293 break;
294
295 case Instruction::FCmp:
296 CI = cast<FCmpInst>(Val: &Inst);
297 for (unsigned p = CmpInst::FIRST_FCMP_PREDICATE;
298 p <= CmpInst::LAST_FCMP_PREDICATE; p++) {
299 Modifications.push_back(
300 Elt: [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
301 }
302 break;
303 }
304
305 // Add fast math flag if possible.
306 if (isa<FPMathOperator>(Val: &Inst)) {
307 // Try setting everything unless they are already on.
308 Modifications.push_back(
309 Elt: [&Inst] { Inst.setFast(!Inst.getFastMathFlags().all()); });
310 // Try unsetting everything unless they are already off.
311 Modifications.push_back(
312 Elt: [&Inst] { Inst.setFast(!Inst.getFastMathFlags().none()); });
313 // Individual setting by flipping the bit
314 Modifications.push_back(
315 Elt: [&Inst] { Inst.setHasAllowReassoc(!Inst.hasAllowReassoc()); });
316 Modifications.push_back(Elt: [&Inst] { Inst.setHasNoNaNs(!Inst.hasNoNaNs()); });
317 Modifications.push_back(Elt: [&Inst] { Inst.setHasNoInfs(!Inst.hasNoInfs()); });
318 Modifications.push_back(
319 Elt: [&Inst] { Inst.setHasNoSignedZeros(!Inst.hasNoSignedZeros()); });
320 Modifications.push_back(
321 Elt: [&Inst] { Inst.setHasAllowReciprocal(!Inst.hasAllowReciprocal()); });
322 Modifications.push_back(
323 Elt: [&Inst] { Inst.setHasAllowContract(!Inst.hasAllowContract()); });
324 Modifications.push_back(
325 Elt: [&Inst] { Inst.setHasApproxFunc(!Inst.hasApproxFunc()); });
326 }
327
328 // Randomly switch operands of instructions
329 std::pair<int, int> NoneItem({-1, -1}), ShuffleItems(NoneItem);
330 switch (Inst.getOpcode()) {
331 case Instruction::SDiv:
332 case Instruction::UDiv:
333 case Instruction::SRem:
334 case Instruction::URem:
335 case Instruction::FDiv:
336 case Instruction::FRem: {
337 // Verify that the after shuffle the second operand is not
338 // constant 0.
339 Value *Operand = Inst.getOperand(i: 0);
340 if (Constant *C = dyn_cast<Constant>(Val: Operand)) {
341 if (!C->isZeroValue()) {
342 ShuffleItems = {0, 1};
343 }
344 }
345 break;
346 }
347 case Instruction::Select:
348 ShuffleItems = {1, 2};
349 break;
350 case Instruction::Add:
351 case Instruction::Sub:
352 case Instruction::Mul:
353 case Instruction::Shl:
354 case Instruction::LShr:
355 case Instruction::AShr:
356 case Instruction::And:
357 case Instruction::Or:
358 case Instruction::Xor:
359 case Instruction::FAdd:
360 case Instruction::FSub:
361 case Instruction::FMul:
362 case Instruction::ICmp:
363 case Instruction::FCmp:
364 case Instruction::ShuffleVector:
365 ShuffleItems = {0, 1};
366 break;
367 }
368 if (ShuffleItems != NoneItem) {
369 Modifications.push_back(Elt: [&Inst, &ShuffleItems]() {
370 Value *Op0 = Inst.getOperand(i: ShuffleItems.first);
371 Inst.setOperand(i: ShuffleItems.first, Val: Inst.getOperand(i: ShuffleItems.second));
372 Inst.setOperand(i: ShuffleItems.second, Val: Op0);
373 });
374 }
375
376 auto RS = makeSampler(RandGen&: IB.Rand, Items&: Modifications);
377 if (RS)
378 RS.getSelection()();
379}
380
381/// Return a case value that is not already taken to make sure we don't have two
382/// cases with same value.
383static uint64_t getUniqueCaseValue(SmallSet<uint64_t, 4> &CasesTaken,
384 uint64_t MaxValue, RandomIRBuilder &IB) {
385 uint64_t tmp;
386 do {
387 tmp = uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: MaxValue);
388 } while (CasesTaken.count(V: tmp) != 0);
389 CasesTaken.insert(V: tmp);
390 return tmp;
391}
392
393/// Determines whether a function is unsupported by the current mutator's
394/// implementation. The function returns true if any of the following criteria
395/// are met:
396/// * The function accepts metadata or token types as arguments.
397/// * The function has ABI attributes that could cause UB.
398/// * The function uses a non-callable CC that may result in UB.
399static bool isUnsupportedFunction(Function *F) {
400 // Some functions accept metadata type or token type as arguments.
401 // We don't call those functions for now.
402 // For example, `@llvm.dbg.declare(metadata, metadata, metadata)`
403 // https://llvm.org/docs/SourceLevelDebugging.html#llvm-dbg-declare
404 auto IsUnsupportedTy = [](Type *T) {
405 return T->isMetadataTy() || T->isTokenTy();
406 };
407
408 if (IsUnsupportedTy(F->getReturnType()) ||
409 any_of(Range: F->getFunctionType()->params(), P: IsUnsupportedTy)) {
410 return true;
411 }
412
413 // ABI attributes must be specified both at the function
414 // declaration/definition and call-site, otherwise the
415 // behavior may be undefined.
416 // We don't call those functions for now to prevent UB from happening.
417 auto IsABIAttribute = [](AttributeSet A) {
418 static const Attribute::AttrKind ABIAttrs[] = {
419 Attribute::StructRet, Attribute::ByVal,
420 Attribute::InAlloca, Attribute::InReg,
421 Attribute::StackAlignment, Attribute::SwiftSelf,
422 Attribute::SwiftAsync, Attribute::SwiftError,
423 Attribute::Preallocated, Attribute::ByRef,
424 Attribute::ZExt, Attribute::SExt};
425
426 return llvm::any_of(Range: ABIAttrs, P: [&](Attribute::AttrKind kind) {
427 return A.hasAttribute(Kind: kind);
428 });
429 };
430
431 auto FuncAttrs = F->getAttributes();
432 if (IsABIAttribute(FuncAttrs.getRetAttrs())) {
433 return true;
434 }
435 for (size_t i = 0; i < F->arg_size(); i++) {
436 if (IsABIAttribute(FuncAttrs.getParamAttrs(ArgNo: i))) {
437 return true;
438 }
439 }
440
441 // If it is not satisfied, the IR will be invalid.
442 if (!isCallableCC(CC: F->getCallingConv())) {
443 return true;
444 }
445
446 // This intrinsic has specific requirements for its parameters and the caller
447 // must adhere to certain calling conventions.
448 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain) {
449 return true;
450 }
451
452 return false;
453}
454
455void InsertFunctionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
456 Module *M = BB.getParent()->getParent();
457 // If nullptr is selected, we will create a new function declaration.
458 SmallVector<Function *, 32> Functions({nullptr});
459 for (Function &F : M->functions()) {
460 Functions.push_back(Elt: &F);
461 }
462
463 auto RS = makeSampler(RandGen&: IB.Rand, Items&: Functions);
464 Function *F = RS.getSelection();
465
466 if (!F || isUnsupportedFunction(F)) {
467 F = IB.createFunctionDeclaration(M&: *M);
468 }
469
470 FunctionType *FTy = F->getFunctionType();
471 SmallVector<fuzzerop::SourcePred, 2> SourcePreds;
472 if (!F->arg_empty()) {
473 for (Type *ArgTy : FTy->params()) {
474 SourcePreds.push_back(Elt: fuzzerop::onlyType(Only: ArgTy));
475 }
476 }
477 bool isRetVoid = (F->getReturnType() == Type::getVoidTy(C&: M->getContext()));
478 auto BuilderFunc = [FTy, F, isRetVoid](ArrayRef<Value *> Srcs,
479 BasicBlock::iterator InsertPt) {
480 StringRef Name = isRetVoid ? nullptr : "C";
481 CallInst *Call = CallInst::Create(Ty: FTy, Func: F, Args: Srcs, NameStr: Name, InsertBefore: InsertPt);
482 Call->setCallingConv(F->getCallingConv());
483 // Don't return this call inst if it return void as it can't be sinked.
484 return isRetVoid ? nullptr : Call;
485 };
486
487 SmallVector<Instruction *, 32> Insts(
488 llvm::make_pointer_range(Range: getInsertionRange(BB)));
489 if (Insts.size() < 1)
490 return;
491
492 // Choose an insertion point for our new call instruction.
493 uint64_t IP = uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: Insts.size() - 1);
494
495 auto InstsBefore = ArrayRef(Insts).slice(N: 0, M: IP);
496 auto InstsAfter = ArrayRef(Insts).slice(N: IP);
497
498 // Choose a source, which will be used to constrain the operation selection.
499 SmallVector<Value *, 2> Srcs;
500
501 for (const auto &Pred : ArrayRef(SourcePreds)) {
502 Srcs.push_back(Elt: IB.findOrCreateSource(BB, Insts: InstsBefore, Srcs, Pred));
503 }
504
505 if (Value *Op = BuilderFunc(Srcs, Insts[IP]->getIterator())) {
506 // Find a sink and wire up the results of the operation.
507 IB.connectToSink(BB, Insts: InstsAfter, V: Op);
508 }
509}
510
511void InsertCFGStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
512 SmallVector<Instruction *, 32> Insts(
513 llvm::make_pointer_range(Range: getInsertionRange(BB)));
514 if (Insts.size() < 1)
515 return;
516
517 // Choose a point where we split the block.
518 uint64_t IP = uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: Insts.size() - 1);
519 auto InstsBeforeSplit = ArrayRef(Insts).slice(N: 0, M: IP);
520
521 // `Sink` inherits Blocks' terminator, `Source` will have a BranchInst
522 // directly jumps to `Sink`. Here, we have to create a new terminator for
523 // `Source`.
524 BasicBlock *Block = Insts[IP]->getParent();
525 BasicBlock *Source = Block;
526 BasicBlock *Sink = Block->splitBasicBlock(I: Insts[IP], BBName: "BB");
527
528 Function *F = BB.getParent();
529 LLVMContext &C = F->getParent()->getContext();
530 // A coin decides if it is branch or switch
531 if (uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: 1)) {
532 // Branch
533 BasicBlock *IfTrue = BasicBlock::Create(Context&: C, Name: "T", Parent: F);
534 BasicBlock *IfFalse = BasicBlock::Create(Context&: C, Name: "F", Parent: F);
535 Value *Cond =
536 IB.findOrCreateSource(BB&: *Source, Insts: InstsBeforeSplit, Srcs: {},
537 Pred: fuzzerop::onlyType(Only: Type::getInt1Ty(C)), allowConstant: false);
538 BranchInst *Branch = BranchInst::Create(IfTrue, IfFalse, Cond);
539 // Remove the old terminator.
540 ReplaceInstWithInst(From: Source->getTerminator(), To: Branch);
541 // Connect these blocks to `Sink`
542 connectBlocksToSink(Blocks: {IfTrue, IfFalse}, Sink, IB);
543 } else {
544 // Switch
545 // Determine Integer type, it IS possible we use a boolean to switch.
546 auto RS =
547 makeSampler(RandGen&: IB.Rand, Items: make_filter_range(Range&: IB.KnownTypes, Pred: [](Type *Ty) {
548 return Ty->isIntegerTy();
549 }));
550 assert(RS && "There is no integer type in all allowed types, is the "
551 "setting correct?");
552 Type *Ty = RS.getSelection();
553 IntegerType *IntTy = cast<IntegerType>(Val: Ty);
554
555 uint64_t BitSize = IntTy->getBitWidth();
556 uint64_t MaxCaseVal =
557 (BitSize >= 64) ? (uint64_t)-1 : ((uint64_t)1 << BitSize) - 1;
558 // Create Switch inst in Block
559 Value *Cond = IB.findOrCreateSource(BB&: *Source, Insts: InstsBeforeSplit, Srcs: {},
560 Pred: fuzzerop::onlyType(Only: IntTy), allowConstant: false);
561 BasicBlock *DefaultBlock = BasicBlock::Create(Context&: C, Name: "SW_D", Parent: F);
562 uint64_t NumCases = uniform<uint64_t>(Gen&: IB.Rand, Min: 1, Max: MaxNumCases);
563 NumCases = (NumCases > MaxCaseVal) ? MaxCaseVal + 1 : NumCases;
564 SwitchInst *Switch = SwitchInst::Create(Value: Cond, Default: DefaultBlock, NumCases);
565 // Remove the old terminator.
566 ReplaceInstWithInst(From: Source->getTerminator(), To: Switch);
567
568 // Create blocks, for each block assign a case value.
569 SmallVector<BasicBlock *, 4> Blocks({DefaultBlock});
570 SmallSet<uint64_t, 4> CasesTaken;
571 for (uint64_t i = 0; i < NumCases; i++) {
572 uint64_t CaseVal = getUniqueCaseValue(CasesTaken, MaxValue: MaxCaseVal, IB);
573 BasicBlock *CaseBlock = BasicBlock::Create(Context&: C, Name: "SW_C", Parent: F);
574 ConstantInt *OnValue = ConstantInt::get(Ty: IntTy, V: CaseVal);
575 Switch->addCase(OnVal: OnValue, Dest: CaseBlock);
576 Blocks.push_back(Elt: CaseBlock);
577 }
578
579 // Connect these blocks to `Sink`
580 connectBlocksToSink(Blocks, Sink, IB);
581 }
582}
583
584/// The caller has to guarantee that these blocks are "empty", i.e. it doesn't
585/// even have terminator.
586void InsertCFGStrategy::connectBlocksToSink(ArrayRef<BasicBlock *> Blocks,
587 BasicBlock *Sink,
588 RandomIRBuilder &IB) {
589 uint64_t DirectSinkIdx = uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: Blocks.size() - 1);
590 for (uint64_t i = 0; i < Blocks.size(); i++) {
591 // We have at least one block that directly goes to sink.
592 CFGToSink ToSink = (i == DirectSinkIdx)
593 ? CFGToSink::DirectSink
594 : static_cast<CFGToSink>(uniform<uint64_t>(
595 Gen&: IB.Rand, Min: 0, Max: CFGToSink::EndOfCFGToLink - 1));
596 BasicBlock *BB = Blocks[i];
597 Function *F = BB->getParent();
598 LLVMContext &C = F->getParent()->getContext();
599 switch (ToSink) {
600 case CFGToSink::Return: {
601 Type *RetTy = F->getReturnType();
602 Value *RetValue = nullptr;
603 if (!RetTy->isVoidTy())
604 RetValue =
605 IB.findOrCreateSource(BB&: *BB, Insts: {}, Srcs: {}, Pred: fuzzerop::onlyType(Only: RetTy));
606 ReturnInst::Create(C, retVal: RetValue, InsertBefore: BB);
607 break;
608 }
609 case CFGToSink::DirectSink: {
610 BranchInst::Create(IfTrue: Sink, InsertBefore: BB);
611 break;
612 }
613 case CFGToSink::SinkOrSelfLoop: {
614 SmallVector<BasicBlock *, 2> Branches({Sink, BB});
615 // A coin decides which block is true branch.
616 uint64_t coin = uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: 1);
617 Value *Cond = IB.findOrCreateSource(
618 BB&: *BB, Insts: {}, Srcs: {}, Pred: fuzzerop::onlyType(Only: Type::getInt1Ty(C)), allowConstant: false);
619 BranchInst::Create(IfTrue: Branches[coin], IfFalse: Branches[1 - coin], Cond, InsertBefore: BB);
620 break;
621 }
622 case CFGToSink::EndOfCFGToLink:
623 llvm_unreachable("EndOfCFGToLink executed, something's wrong.");
624 }
625 }
626}
627
628void InsertPHIStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
629 // Can't insert PHI node to entry node.
630 if (&BB == &BB.getParent()->getEntryBlock())
631 return;
632 Type *Ty = IB.randomType();
633 PHINode *PHI = PHINode::Create(Ty, NumReservedValues: llvm::pred_size(BB: &BB), NameStr: "", InsertBefore: BB.begin());
634
635 // Use a map to make sure the same incoming basic block has the same value.
636 DenseMap<BasicBlock *, Value *> IncomingValues;
637 for (BasicBlock *Pred : predecessors(BB: &BB)) {
638 Value *Src = IncomingValues[Pred];
639 // If `Pred` is not in the map yet, we'll get a nullptr.
640 if (!Src) {
641 SmallVector<Instruction *, 32> Insts;
642 for (auto I = Pred->begin(); I != Pred->end(); ++I)
643 Insts.push_back(Elt: &*I);
644 // There is no need to inform IB what previously used values are if we are
645 // using `onlyType`
646 Src = IB.findOrCreateSource(BB&: *Pred, Insts, Srcs: {}, Pred: fuzzerop::onlyType(Only: Ty));
647 IncomingValues[Pred] = Src;
648 }
649 PHI->addIncoming(V: Src, BB: Pred);
650 }
651 SmallVector<Instruction *, 32> InstsAfter(
652 llvm::make_pointer_range(Range: getInsertionRange(BB)));
653 IB.connectToSink(BB, Insts: InstsAfter, V: PHI);
654}
655
656void SinkInstructionStrategy::mutate(Function &F, RandomIRBuilder &IB) {
657 for (BasicBlock &BB : F) {
658 this->mutate(BB, IB);
659 }
660}
661void SinkInstructionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
662 SmallVector<Instruction *, 32> Insts(
663 llvm::make_pointer_range(Range: getInsertionRange(BB)));
664 if (Insts.size() < 1)
665 return;
666 // Choose an Instruction to mutate.
667 uint64_t Idx = uniform<uint64_t>(Gen&: IB.Rand, Min: 0, Max: Insts.size() - 1);
668 Instruction *Inst = Insts[Idx];
669 // `Idx + 1` so we don't sink to ourselves.
670 auto InstsAfter = ArrayRef(Insts).slice(N: Idx + 1);
671 Type *Ty = Inst->getType();
672 // Don't sink terminators, void function calls, token, etc.
673 if (!Ty->isVoidTy() && !Ty->isTokenTy())
674 // Find a new sink and wire up the results of the operation.
675 IB.connectToSink(BB, Insts: InstsAfter, V: Inst);
676}
677
678void ShuffleBlockStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
679 // A deterministic alternative to SmallPtrSet with the same lookup
680 // performance.
681 std::map<size_t, Instruction *> AliveInsts;
682 std::map<Instruction *, size_t> AliveInstsLookup;
683 size_t InsertIdx = 0;
684 for (auto &I : make_early_inc_range(
685 Range: make_range(x: BB.getFirstInsertionPt(),
686 y: getEffectiveTerminator(BB)->getIterator()))) {
687 // First gather all instructions that can be shuffled. Don't take
688 // terminator.
689 AliveInsts.insert(x: {InsertIdx, &I});
690 AliveInstsLookup.insert(x: {&I, InsertIdx++});
691 // Then remove these instructions from the block
692 I.removeFromParent();
693 }
694
695 // Shuffle these instructions using topological sort.
696 // Returns false if all current instruction's dependencies in this block have
697 // been shuffled. If so, this instruction can be shuffled too.
698 auto hasAliveParent = [&AliveInsts, &AliveInstsLookup](size_t Index) {
699 for (Value *O : AliveInsts[Index]->operands()) {
700 Instruction *P = dyn_cast<Instruction>(Val: O);
701 if (P && AliveInstsLookup.count(x: P))
702 return true;
703 }
704 return false;
705 };
706 // Get all alive instructions that depend on the current instruction.
707 // Takes Instruction* instead of index because the instruction is already
708 // shuffled.
709 auto getAliveChildren = [&AliveInstsLookup](Instruction *I) {
710 SmallSetVector<size_t, 8> Children;
711 for (Value *U : I->users()) {
712 if (Instruction *P = dyn_cast<Instruction>(Val: U)) {
713 auto It = AliveInstsLookup.find(x: P);
714 if (It != AliveInstsLookup.end())
715 Children.insert(X: It->second);
716 }
717 }
718 return Children;
719 };
720 SmallSet<size_t, 8> RootIndices;
721 SmallVector<Instruction *, 8> Insts;
722 for (const auto &[Index, Inst] : AliveInsts) {
723 if (!hasAliveParent(Index))
724 RootIndices.insert(V: Index);
725 }
726 // Topological sort by randomly selecting a node without a parent, or root.
727 while (!RootIndices.empty()) {
728 auto RS = makeSampler<size_t>(RandGen&: IB.Rand);
729 for (size_t RootIdx : RootIndices)
730 RS.sample(Item: RootIdx, Weight: 1);
731 size_t RootIdx = RS.getSelection();
732
733 RootIndices.erase(V: RootIdx);
734 Instruction *Root = AliveInsts[RootIdx];
735 AliveInsts.erase(x: RootIdx);
736 AliveInstsLookup.erase(x: Root);
737 Insts.push_back(Elt: Root);
738
739 for (size_t Child : getAliveChildren(Root)) {
740 if (!hasAliveParent(Child)) {
741 RootIndices.insert(V: Child);
742 }
743 }
744 }
745
746 Instruction *Terminator = getEffectiveTerminator(BB);
747 // Then put instructions back.
748 for (Instruction *I : Insts) {
749 I->insertBefore(InsertPos: Terminator->getIterator());
750 }
751}
752
753std::unique_ptr<Module> llvm::parseModule(const uint8_t *Data, size_t Size,
754 LLVMContext &Context) {
755
756 if (Size <= 1)
757 // We get bogus data given an empty corpus - just create a new module.
758 return std::make_unique<Module>(args: "M", args&: Context);
759
760 auto Buffer = MemoryBuffer::getMemBuffer(
761 InputData: StringRef(reinterpret_cast<const char *>(Data), Size), BufferName: "Fuzzer input",
762 /*RequiresNullTerminator=*/false);
763
764 SMDiagnostic Err;
765 auto M = parseBitcodeFile(Buffer: Buffer->getMemBufferRef(), Context);
766 if (Error E = M.takeError()) {
767 errs() << toString(E: std::move(E)) << "\n";
768 return nullptr;
769 }
770 return std::move(M.get());
771}
772
773size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) {
774 std::string Buf;
775 {
776 raw_string_ostream OS(Buf);
777 WriteBitcodeToFile(M, Out&: OS);
778 }
779 if (Buf.size() > MaxSize)
780 return 0;
781 memcpy(dest: Dest, src: Buf.data(), n: Buf.size());
782 return Buf.size();
783}
784
785std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size,
786 LLVMContext &Context) {
787 auto M = parseModule(Data, Size, Context);
788 if (!M || verifyModule(M: *M, OS: &errs()))
789 return nullptr;
790
791 return M;
792}
793