1//===-- RandomIRBuilder.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/FuzzMutate/RandomIRBuilder.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/FuzzMutate/OpDescriptor.h"
12#include "llvm/FuzzMutate/Random.h"
13#include "llvm/IR/BasicBlock.h"
14#include "llvm/IR/Constants.h"
15#include "llvm/IR/DataLayout.h"
16#include "llvm/IR/Dominators.h"
17#include "llvm/IR/Function.h"
18#include "llvm/IR/Instructions.h"
19#include "llvm/IR/Module.h"
20
21using namespace llvm;
22using namespace fuzzerop;
23
24/// Return a vector of Blocks that dominates this block, excluding current
25/// block.
26static std::vector<BasicBlock *> getDominators(BasicBlock *BB) {
27 std::vector<BasicBlock *> ret;
28 DominatorTree DT(*BB->getParent());
29 DomTreeNode *Node = DT.getNode(BB);
30 // It's possible that an orphan block is not in the dom tree. In that case we
31 // just return nothing.
32 if (!Node)
33 return ret;
34 Node = Node->getIDom();
35 while (Node && Node->getBlock()) {
36 ret.push_back(x: Node->getBlock());
37 // Get parent block.
38 Node = Node->getIDom();
39 }
40 return ret;
41}
42
43/// Return a vector of Blocks that is dominated by this block, excluding current
44/// block
45static std::vector<BasicBlock *> getDominatees(BasicBlock *BB) {
46 DominatorTree DT(*BB->getParent());
47 std::vector<BasicBlock *> ret;
48 DomTreeNode *Parent = DT.getNode(BB);
49 // It's possible that an orphan block is not in the dom tree. In that case we
50 // just return nothing.
51 if (!Parent)
52 return ret;
53 for (DomTreeNode *Child : Parent->children())
54 ret.push_back(x: Child->getBlock());
55 uint64_t Idx = 0;
56 while (Idx < ret.size()) {
57 DomTreeNode *Node = DT[ret[Idx]];
58 Idx++;
59 for (DomTreeNode *Child : Node->children())
60 ret.push_back(x: Child->getBlock());
61 }
62 return ret;
63}
64
65AllocaInst *RandomIRBuilder::createStackMemory(Function *F, Type *Ty,
66 Value *Init) {
67 /// TODO: For all Allocas, maybe allocate an array.
68 BasicBlock *EntryBB = &F->getEntryBlock();
69 const DataLayout &DL = F->getDataLayout();
70 AllocaInst *Alloca = new AllocaInst(Ty, DL.getAllocaAddrSpace(), "A",
71 EntryBB->getFirstInsertionPt());
72 if (Init)
73 new StoreInst(Init, Alloca, std::next(x: Alloca->getIterator()));
74 return Alloca;
75}
76
77std::pair<GlobalVariable *, bool>
78RandomIRBuilder::findOrCreateGlobalVariable(Module *M, ArrayRef<Value *> Srcs,
79 fuzzerop::SourcePred Pred) {
80 auto MatchesPred = [&Srcs, &Pred](GlobalVariable *GV) {
81 // Can't directly compare GV's type, as it would be a pointer to the actual
82 // type.
83 return Pred.matches(Cur: Srcs, New: PoisonValue::get(T: GV->getValueType()));
84 };
85 bool DidCreate = false;
86 SmallVector<GlobalVariable *, 4> GlobalVars(
87 llvm::make_pointer_range(Range: M->globals()));
88 auto RS = makeSampler(RandGen&: Rand, Items: make_filter_range(Range&: GlobalVars, Pred: MatchesPred));
89 RS.sample(Item: nullptr, Weight: 1);
90 GlobalVariable *GV = RS.getSelection();
91 if (!GV) {
92 DidCreate = true;
93 using LinkageTypes = GlobalVariable::LinkageTypes;
94 auto TRS = makeSampler<Constant *>(RandGen&: Rand);
95 TRS.sample(Items: Pred.generate(Cur: Srcs, BaseTypes: KnownTypes));
96 Constant *Init = TRS.getSelection();
97 Type *Ty = Init->getType();
98 GV = new GlobalVariable(*M, Ty, false, LinkageTypes::ExternalLinkage, Init,
99 "G", nullptr,
100 GlobalValue::ThreadLocalMode::NotThreadLocal,
101 M->getDataLayout().getDefaultGlobalsAddressSpace());
102 }
103 return {GV, DidCreate};
104}
105
106Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
107 ArrayRef<Instruction *> Insts) {
108 return findOrCreateSource(BB, Insts, Srcs: {}, Pred: anyType());
109}
110
111Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
112 ArrayRef<Instruction *> Insts,
113 ArrayRef<Value *> Srcs,
114 SourcePred Pred,
115 bool allowConstant) {
116 auto MatchesPred = [&Srcs, &Pred](Value *V) { return Pred.matches(Cur: Srcs, New: V); };
117 SmallVector<uint64_t, 8> SrcTys;
118 for (uint64_t i = 0; i < EndOfValueSource; i++)
119 SrcTys.push_back(Elt: i);
120 std::shuffle(first: SrcTys.begin(), last: SrcTys.end(), g&: Rand);
121 for (uint64_t SrcTy : SrcTys) {
122 switch (SrcTy) {
123 case SrcFromInstInCurBlock: {
124 auto RS = makeSampler(RandGen&: Rand, Items: make_filter_range(Range&: Insts, Pred: MatchesPred));
125 if (!RS.isEmpty()) {
126 return RS.getSelection();
127 }
128 break;
129 }
130 case FunctionArgument: {
131 Function *F = BB.getParent();
132 SmallVector<Argument *, 8> Args;
133 for (uint64_t i = 0; i < F->arg_size(); i++) {
134 Args.push_back(Elt: F->getArg(i));
135 }
136 auto RS = makeSampler(RandGen&: Rand, Items: make_filter_range(Range&: Args, Pred: MatchesPred));
137 if (!RS.isEmpty()) {
138 return RS.getSelection();
139 }
140 break;
141 }
142 case InstInDominator: {
143 auto Dominators = getDominators(BB: &BB);
144 std::shuffle(first: Dominators.begin(), last: Dominators.end(), g&: Rand);
145 for (BasicBlock *Dom : Dominators) {
146 SmallVector<Instruction *, 16> Instructions(
147 llvm::make_pointer_range(Range&: *Dom));
148 auto RS =
149 makeSampler(RandGen&: Rand, Items: make_filter_range(Range&: Instructions, Pred: MatchesPred));
150 // Also consider choosing no source, meaning we want a new one.
151 if (!RS.isEmpty()) {
152 return RS.getSelection();
153 }
154 }
155 break;
156 }
157 case SrcFromGlobalVariable: {
158 Module *M = BB.getParent()->getParent();
159 auto [GV, DidCreate] = findOrCreateGlobalVariable(M, Srcs, Pred);
160 Type *Ty = GV->getValueType();
161 LoadInst *LoadGV = nullptr;
162 if (BB.getTerminator()) {
163 LoadGV = new LoadInst(Ty, GV, "LGV", BB.getFirstInsertionPt());
164 } else {
165 LoadGV = new LoadInst(Ty, GV, "LGV", &BB);
166 }
167 // Because we might be generating new values, we have to check if it
168 // matches again.
169 if (DidCreate) {
170 if (Pred.matches(Cur: Srcs, New: LoadGV)) {
171 return LoadGV;
172 }
173 LoadGV->eraseFromParent();
174 // If no one is using this GlobalVariable, delete it too.
175 if (GV->use_empty()) {
176 GV->eraseFromParent();
177 }
178 }
179 break;
180 }
181 case NewConstOrStack: {
182 return newSource(BB, Insts, Srcs, Pred, allowConstant);
183 }
184 default:
185 case EndOfValueSource: {
186 llvm_unreachable("EndOfValueSource executed");
187 }
188 }
189 }
190 llvm_unreachable("Can't find a source");
191}
192
193Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
194 ArrayRef<Value *> Srcs, SourcePred Pred,
195 bool allowConstant) {
196 // Generate some constants to choose from.
197 auto RS = makeSampler<Value *>(RandGen&: Rand);
198 RS.sample(Items: Pred.generate(Cur: Srcs, BaseTypes: KnownTypes));
199
200 // If we can find a pointer to load from, use it half the time.
201 Value *Ptr = findPointer(BB, Insts);
202 if (Ptr) {
203 // Create load from the chosen pointer
204 auto IP = BB.getFirstInsertionPt();
205 if (auto *I = dyn_cast<Instruction>(Val: Ptr)) {
206 IP = ++I->getIterator();
207 assert(IP != BB.end() && "guaranteed by the findPointer");
208 }
209 // Pick the type independently.
210 Type *AccessTy = RS.getSelection()->getType();
211 auto *NewLoad = new LoadInst(AccessTy, Ptr, "L", IP);
212
213 // Only sample this load if it really matches the descriptor
214 if (Pred.matches(Cur: Srcs, New: NewLoad))
215 RS.sample(Item: NewLoad, Weight: RS.totalWeight());
216 else
217 NewLoad->eraseFromParent();
218 }
219
220 Value *newSrc = RS.getSelection();
221 // Generate a stack alloca and store the constant to it if constant is not
222 // allowed, our hope is that later mutations can generate some values and
223 // store to this placeholder.
224 if (!allowConstant && isa<Constant>(Val: newSrc)) {
225 Type *Ty = newSrc->getType();
226 Function *F = BB.getParent();
227 AllocaInst *Alloca = createStackMemory(F, Ty, Init: newSrc);
228 if (BB.getTerminator()) {
229 newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L",
230 BB.getTerminator()->getIterator());
231 } else {
232 newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", &BB);
233 }
234 }
235 return newSrc;
236}
237
238static bool isCompatibleReplacement(const Instruction *I, const Use &Operand,
239 const Value *Replacement) {
240 unsigned int OperandNo = Operand.getOperandNo();
241 if (Operand->getType() != Replacement->getType())
242 return false;
243 switch (I->getOpcode()) {
244 case Instruction::GetElementPtr:
245 case Instruction::ExtractElement:
246 case Instruction::ExtractValue:
247 // TODO: We could potentially validate these, but for now just leave indices
248 // alone.
249 if (OperandNo >= 1)
250 return false;
251 break;
252 case Instruction::InsertValue:
253 case Instruction::InsertElement:
254 case Instruction::ShuffleVector:
255 if (OperandNo >= 2)
256 return false;
257 break;
258 // For Br/Switch, we only try to modify the 1st Operand (condition).
259 // Modify other operands, like switch case may accidently change case from
260 // ConstantInt to a register, which is illegal.
261 case Instruction::Switch:
262 case Instruction::Br:
263 if (OperandNo >= 1)
264 return false;
265 break;
266 case Instruction::Call:
267 case Instruction::Invoke:
268 case Instruction::CallBr: {
269 const Function *Callee = cast<CallBase>(Val: I)->getCalledFunction();
270 // If it's an indirect call, give up.
271 if (!Callee)
272 return false;
273 // If callee is not an intrinsic, operand 0 is the function to be called.
274 // Since we cannot assume that the replacement is a function pointer,
275 // we give up.
276 if (!Callee->getIntrinsicID() && OperandNo == 0)
277 return false;
278 return !Callee->hasParamAttribute(ArgNo: OperandNo, Kind: Attribute::ImmArg);
279 }
280 default:
281 break;
282 }
283 return true;
284}
285
286Instruction *RandomIRBuilder::connectToSink(BasicBlock &BB,
287 ArrayRef<Instruction *> Insts,
288 Value *V) {
289 SmallVector<uint64_t, 8> SinkTys;
290 for (uint64_t i = 0; i < EndOfValueSink; i++)
291 SinkTys.push_back(Elt: i);
292 std::shuffle(first: SinkTys.begin(), last: SinkTys.end(), g&: Rand);
293 auto findSinkAndConnect =
294 [this, V](ArrayRef<Instruction *> Instructions) -> Instruction * {
295 auto RS = makeSampler<Use *>(RandGen&: Rand);
296 for (auto &I : Instructions) {
297 for (Use &U : I->operands())
298 if (isCompatibleReplacement(I, Operand: U, Replacement: V))
299 RS.sample(Item: &U, Weight: 1);
300 }
301 if (!RS.isEmpty()) {
302 Use *Sink = RS.getSelection();
303 User *U = Sink->getUser();
304 unsigned OpNo = Sink->getOperandNo();
305 U->setOperand(i: OpNo, Val: V);
306 return cast<Instruction>(Val: U);
307 }
308 return nullptr;
309 };
310 Instruction *Sink = nullptr;
311 for (uint64_t SinkTy : SinkTys) {
312 switch (SinkTy) {
313 case SinkToInstInCurBlock:
314 Sink = findSinkAndConnect(Insts);
315 if (Sink)
316 return Sink;
317 break;
318 case PointersInDominator: {
319 auto Dominators = getDominators(BB: &BB);
320 std::shuffle(first: Dominators.begin(), last: Dominators.end(), g&: Rand);
321 for (BasicBlock *Dom : Dominators) {
322 for (Instruction &I : *Dom) {
323 if (isa<PointerType>(Val: I.getType()))
324 return new StoreInst(V, &I, Insts.back()->getIterator());
325 }
326 }
327 break;
328 }
329 case InstInDominatee: {
330 auto Dominatees = getDominatees(BB: &BB);
331 std::shuffle(first: Dominatees.begin(), last: Dominatees.end(), g&: Rand);
332 for (BasicBlock *Dominee : Dominatees) {
333 std::vector<Instruction *> Instructions;
334 for (Instruction &I : *Dominee)
335 Instructions.push_back(x: &I);
336 Sink = findSinkAndConnect(Instructions);
337 if (Sink) {
338 return Sink;
339 }
340 }
341 break;
342 }
343 case NewStore:
344 /// TODO: allocate a new stack memory.
345 return newSink(BB, Insts, V);
346 case SinkToGlobalVariable: {
347 Module *M = BB.getParent()->getParent();
348 auto [GV, DidCreate] =
349 findOrCreateGlobalVariable(M, Srcs: {}, Pred: fuzzerop::onlyType(Only: V->getType()));
350 return new StoreInst(V, GV, Insts.back()->getIterator());
351 }
352 case EndOfValueSink:
353 default:
354 llvm_unreachable("EndOfValueSink executed");
355 }
356 }
357 llvm_unreachable("Can't find a sink");
358}
359
360Instruction *RandomIRBuilder::newSink(BasicBlock &BB,
361 ArrayRef<Instruction *> Insts, Value *V) {
362 Value *Ptr = findPointer(BB, Insts);
363 if (!Ptr) {
364 if (uniform(Gen&: Rand, Min: 0, Max: 1)) {
365 Type *Ty = V->getType();
366 Ptr = createStackMemory(F: BB.getParent(), Ty, Init: PoisonValue::get(T: Ty));
367 } else {
368 Ptr = PoisonValue::get(T: PointerType::get(C&: V->getContext(), AddressSpace: 0));
369 }
370 }
371
372 return new StoreInst(V, Ptr, Insts.back()->getIterator());
373}
374
375Value *RandomIRBuilder::findPointer(BasicBlock &BB,
376 ArrayRef<Instruction *> Insts) {
377 auto IsMatchingPtr = [](Instruction *Inst) {
378 // Invoke instructions sometimes produce valid pointers but currently
379 // we can't insert loads or stores from them
380 if (Inst->isTerminator())
381 return false;
382
383 return Inst->getType()->isPointerTy();
384 };
385 if (auto RS = makeSampler(RandGen&: Rand, Items: make_filter_range(Range&: Insts, Pred: IsMatchingPtr)))
386 return RS.getSelection();
387 return nullptr;
388}
389
390Type *RandomIRBuilder::randomType() {
391 uint64_t TyIdx = uniform<uint64_t>(Gen&: Rand, Min: 0, Max: KnownTypes.size() - 1);
392 return KnownTypes[TyIdx];
393}
394
395Function *RandomIRBuilder::createFunctionDeclaration(Module &M,
396 uint64_t ArgNum) {
397 Type *RetType = randomType();
398
399 SmallVector<Type *, 2> Args;
400 for (uint64_t i = 0; i < ArgNum; i++) {
401 Args.push_back(Elt: randomType());
402 }
403
404 Function *F = Function::Create(Ty: FunctionType::get(Result: RetType, Params: Args,
405 /*isVarArg=*/false),
406 Linkage: GlobalValue::ExternalLinkage, N: "f", M: &M);
407 return F;
408}
409Function *RandomIRBuilder::createFunctionDeclaration(Module &M) {
410 return createFunctionDeclaration(
411 M, ArgNum: uniform<uint64_t>(Gen&: Rand, Min: MinArgNum, Max: MaxArgNum));
412}
413
414Function *RandomIRBuilder::createFunctionDefinition(Module &M,
415 uint64_t ArgNum) {
416 Function *F = this->createFunctionDeclaration(M, ArgNum);
417
418 // TODO: Some arguments and a return value would probably be more
419 // interesting.
420 LLVMContext &Context = M.getContext();
421 const DataLayout &DL = M.getDataLayout();
422 BasicBlock *BB = BasicBlock::Create(Context, Name: "BB", Parent: F);
423 Type *RetTy = F->getReturnType();
424 if (RetTy != Type::getVoidTy(C&: Context)) {
425 Instruction *RetAlloca =
426 new AllocaInst(RetTy, DL.getAllocaAddrSpace(), "RP", BB);
427 Instruction *RetLoad = new LoadInst(RetTy, RetAlloca, "", BB);
428 ReturnInst::Create(C&: Context, retVal: RetLoad, InsertBefore: BB);
429 } else {
430 ReturnInst::Create(C&: Context, InsertAtEnd: BB);
431 }
432
433 return F;
434}
435Function *RandomIRBuilder::createFunctionDefinition(Module &M) {
436 return createFunctionDefinition(
437 M, ArgNum: uniform<uint64_t>(Gen&: Rand, Min: MinArgNum, Max: MaxArgNum));
438}
439