1//===-- RandomIRBuilder.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/FuzzMutate/RandomIRBuilder.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/FuzzMutate/OpDescriptor.h"
12#include "llvm/FuzzMutate/Random.h"
13#include "llvm/IR/BasicBlock.h"
14#include "llvm/IR/Constants.h"
15#include "llvm/IR/DataLayout.h"
16#include "llvm/IR/Dominators.h"
17#include "llvm/IR/Function.h"
18#include "llvm/IR/Instructions.h"
19#include "llvm/IR/Module.h"
20
21using namespace llvm;
22using namespace fuzzerop;
23
24static DominatorTree getDomTree(Function &F) {
25 // Dominator tree construction requires that all blocks have terminators.
26 SmallVector<Instruction *> AddedInsts;
27 for (BasicBlock &BB : F)
28 if (!BB.getTerminator())
29 AddedInsts.push_back(Elt: new UnreachableInst(F.getContext(), &BB));
30 DominatorTree DT(F);
31 for (Instruction *I : AddedInsts)
32 I->eraseFromParent();
33 return DT;
34}
35
36/// Return a vector of Blocks that dominates this block, excluding current
37/// block.
38static std::vector<BasicBlock *> getDominators(BasicBlock *BB) {
39 std::vector<BasicBlock *> ret;
40 DominatorTree DT = getDomTree(F&: *BB->getParent());
41 DomTreeNode *Node = DT.getNode(BB);
42 // It's possible that an orphan block is not in the dom tree. In that case we
43 // just return nothing.
44 if (!Node)
45 return ret;
46 Node = Node->getIDom();
47 while (Node && Node->getBlock()) {
48 ret.push_back(x: Node->getBlock());
49 // Get parent block.
50 Node = Node->getIDom();
51 }
52 return ret;
53}
54
55/// Return a vector of Blocks that is dominated by this block, excluding current
56/// block
57static std::vector<BasicBlock *> getDominatees(BasicBlock *BB) {
58 DominatorTree DT = getDomTree(F&: *BB->getParent());
59 std::vector<BasicBlock *> ret;
60 DomTreeNode *Parent = DT.getNode(BB);
61 // It's possible that an orphan block is not in the dom tree. In that case we
62 // just return nothing.
63 if (!Parent)
64 return ret;
65 for (DomTreeNode *Child : Parent->children())
66 ret.push_back(x: Child->getBlock());
67 uint64_t Idx = 0;
68 while (Idx < ret.size()) {
69 DomTreeNode *Node = DT[ret[Idx]];
70 Idx++;
71 for (DomTreeNode *Child : Node->children())
72 ret.push_back(x: Child->getBlock());
73 }
74 return ret;
75}
76
77AllocaInst *RandomIRBuilder::createStackMemory(Function *F, Type *Ty,
78 Value *Init) {
79 /// TODO: For all Allocas, maybe allocate an array.
80 BasicBlock *EntryBB = &F->getEntryBlock();
81 const DataLayout &DL = F->getDataLayout();
82 AllocaInst *Alloca = new AllocaInst(Ty, DL.getAllocaAddrSpace(), "A",
83 EntryBB->getFirstInsertionPt());
84 if (Init)
85 new StoreInst(Init, Alloca, std::next(x: Alloca->getIterator()));
86 return Alloca;
87}
88
89std::pair<GlobalVariable *, bool>
90RandomIRBuilder::findOrCreateGlobalVariable(Module *M, ArrayRef<Value *> Srcs,
91 fuzzerop::SourcePred Pred) {
92 auto MatchesPred = [&Srcs, &Pred](GlobalVariable *GV) {
93 // Can't directly compare GV's type, as it would be a pointer to the actual
94 // type.
95 return Pred.matches(Cur: Srcs, New: PoisonValue::get(T: GV->getValueType()));
96 };
97 bool DidCreate = false;
98 SmallVector<GlobalVariable *, 4> GlobalVars(
99 llvm::make_pointer_range(Range: M->globals()));
100 auto RS = makeSampler(RandGen&: Rand, Items: make_filter_range(Range&: GlobalVars, Pred: MatchesPred));
101 RS.sample(Item: nullptr, Weight: 1);
102 GlobalVariable *GV = RS.getSelection();
103 if (!GV) {
104 DidCreate = true;
105 using LinkageTypes = GlobalVariable::LinkageTypes;
106 auto TRS = makeSampler<Constant *>(RandGen&: Rand);
107 TRS.sample(Items: Pred.generate(Cur: Srcs, BaseTypes: KnownTypes));
108 Constant *Init = TRS.getSelection();
109 Type *Ty = Init->getType();
110 GV = new GlobalVariable(*M, Ty, false, LinkageTypes::ExternalLinkage, Init,
111 "G", nullptr,
112 GlobalValue::ThreadLocalMode::NotThreadLocal,
113 M->getDataLayout().getDefaultGlobalsAddressSpace());
114 }
115 return {GV, DidCreate};
116}
117
118Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
119 ArrayRef<Instruction *> Insts) {
120 return findOrCreateSource(BB, Insts, Srcs: {}, Pred: anyType());
121}
122
123// Adapts the current pointer for a legal mem operation on the target arch.
124static Value *buildTargetLegalPtr(Module *M, Value *Ptr, InsertPosition IP,
125 const Twine &Name,
126 SmallVector<Instruction *> *NewInsts) {
127 if (M && M->getTargetTriple().isAMDGCN()) {
128 // Check if we should perform an address space cast
129 PointerType *pointerType = dyn_cast<PointerType>(Val: Ptr->getType());
130 if (pointerType && pointerType->getAddressSpace() == 8) {
131 // Perform address space cast from address space 8 to address space 7
132 auto NewPtr = new AddrSpaceCastInst(
133 Ptr, PointerType::get(C&: M->getContext(), AddressSpace: 7), Name + ".ASC", IP);
134 if (NewInsts)
135 NewInsts->push_back(Elt: NewPtr);
136 return NewPtr;
137 }
138 }
139
140 return Ptr;
141}
142
143// Stores a value to memory, considering the target triple's restrictions.
144static Instruction *buildTargetLegalStore(Value *Val, Value *Ptr,
145 InsertPosition IP, Module *M) {
146 Value *StorePtr = buildTargetLegalPtr(M, Ptr, IP, Name: "", NewInsts: nullptr);
147 Instruction *Store = new StoreInst(Val, StorePtr, IP);
148 return Store;
149}
150
151// Loads a value from memory, considering the target triple's restrictions.
152static std::pair<Instruction *, SmallVector<Instruction *>>
153buildTargetLegalLoad(Type *AccessTy, Value *Ptr, InsertPosition IP, Module *M,
154 const Twine &LoadName) {
155 SmallVector<Instruction *> NewInsts;
156
157 Value *LoadPtr = buildTargetLegalPtr(M, Ptr, IP, Name: LoadName, NewInsts: &NewInsts);
158
159 Instruction *Load = new LoadInst(AccessTy, LoadPtr, LoadName, IP);
160 NewInsts.push_back(Elt: Load);
161
162 return std::make_pair(x&: Load, y&: NewInsts);
163}
164
165static void eraseNewInstructions(SmallVector<Instruction *> &NewInsts) {
166 // Remove in reverse order (uses before defs)
167 for (auto it = NewInsts.rbegin(); it != NewInsts.rend(); ++it) {
168 (*it)->eraseFromParent();
169 }
170}
171
172Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
173 ArrayRef<Instruction *> Insts,
174 ArrayRef<Value *> Srcs,
175 SourcePred Pred,
176 bool allowConstant) {
177 auto MatchesPred = [&Srcs, &Pred](Value *V) { return Pred.matches(Cur: Srcs, New: V); };
178 SmallVector<uint64_t, 8> SrcTys;
179 for (uint64_t i = 0; i < EndOfValueSource; i++)
180 SrcTys.push_back(Elt: i);
181 std::shuffle(first: SrcTys.begin(), last: SrcTys.end(), g&: Rand);
182 for (uint64_t SrcTy : SrcTys) {
183 switch (SrcTy) {
184 case SrcFromInstInCurBlock: {
185 auto RS = makeSampler(RandGen&: Rand, Items: make_filter_range(Range&: Insts, Pred: MatchesPred));
186 if (!RS.isEmpty()) {
187 return RS.getSelection();
188 }
189 break;
190 }
191 case FunctionArgument: {
192 Function *F = BB.getParent();
193 SmallVector<Argument *, 8> Args;
194 for (uint64_t i = 0; i < F->arg_size(); i++) {
195 Args.push_back(Elt: F->getArg(i));
196 }
197 auto RS = makeSampler(RandGen&: Rand, Items: make_filter_range(Range&: Args, Pred: MatchesPred));
198 if (!RS.isEmpty()) {
199 return RS.getSelection();
200 }
201 break;
202 }
203 case InstInDominator: {
204 auto Dominators = getDominators(BB: &BB);
205 std::shuffle(first: Dominators.begin(), last: Dominators.end(), g&: Rand);
206 for (BasicBlock *Dom : Dominators) {
207 SmallVector<Instruction *, 16> Instructions(
208 llvm::make_pointer_range(Range&: *Dom));
209 auto RS =
210 makeSampler(RandGen&: Rand, Items: make_filter_range(Range&: Instructions, Pred: MatchesPred));
211 // Also consider choosing no source, meaning we want a new one.
212 if (!RS.isEmpty()) {
213 return RS.getSelection();
214 }
215 }
216 break;
217 }
218 case SrcFromGlobalVariable: {
219 Module *M = BB.getParent()->getParent();
220 auto [GV, DidCreate] = findOrCreateGlobalVariable(M, Srcs, Pred);
221 Type *Ty = GV->getValueType();
222 InsertPosition IP = BB.getTerminator()
223 ? InsertPosition(BB.getFirstInsertionPt())
224 : InsertPosition(&BB);
225 // Build a legal load and track new instructions in case a rollback is
226 // needed.
227 auto [LoadGV, NewInsts] = buildTargetLegalLoad(AccessTy: Ty, Ptr: GV, IP, M, LoadName: "LGV");
228 // Because we might be generating new values, we have to check if it
229 // matches again.
230 if (DidCreate) {
231 if (Pred.matches(Cur: Srcs, New: LoadGV)) {
232 return LoadGV;
233 }
234 // Remove newly inserted instructions
235 eraseNewInstructions(NewInsts);
236 // If no one is using this GlobalVariable, delete it too.
237 if (GV->use_empty()) {
238 GV->eraseFromParent();
239 }
240 }
241 break;
242 }
243 case NewConstOrStack: {
244 return newSource(BB, Insts, Srcs, Pred, allowConstant);
245 }
246 default:
247 case EndOfValueSource: {
248 llvm_unreachable("EndOfValueSource executed");
249 }
250 }
251 }
252 llvm_unreachable("Can't find a source");
253}
254
255Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
256 ArrayRef<Value *> Srcs, SourcePred Pred,
257 bool allowConstant) {
258 // Generate some constants to choose from.
259 auto RS = makeSampler<Value *>(RandGen&: Rand);
260 RS.sample(Items: Pred.generate(Cur: Srcs, BaseTypes: KnownTypes));
261
262 // If we can find a pointer to load from, use it half the time.
263 Value *Ptr = findPointer(BB, Insts);
264 if (Ptr) {
265 // Create load from the chosen pointer
266 auto IP = BB.getFirstInsertionPt();
267 if (auto *I = dyn_cast<Instruction>(Val: Ptr)) {
268 IP = ++I->getIterator();
269 assert(IP != BB.end() && "guaranteed by the findPointer");
270 }
271 // Pick the type independently.
272 Type *AccessTy = RS.getSelection()->getType();
273 // Build a legal load and track new instructions in case a rollback is
274 // needed.
275 auto [NewLoad, NewInsts] =
276 buildTargetLegalLoad(AccessTy, Ptr, IP, M: BB.getModule(), LoadName: "L");
277
278 // Only sample this load if it really matches the descriptor
279 if (Pred.matches(Cur: Srcs, New: NewLoad))
280 RS.sample(Item: NewLoad, Weight: RS.totalWeight());
281 else {
282 // Remove newly inserted instructions
283 eraseNewInstructions(NewInsts);
284 }
285 }
286
287 Value *newSrc = RS.getSelection();
288 // Generate a stack alloca and store the constant to it if constant is not
289 // allowed, our hope is that later mutations can generate some values and
290 // store to this placeholder.
291 if (!allowConstant && isa<Constant>(Val: newSrc)) {
292 Type *Ty = newSrc->getType();
293 Function *F = BB.getParent();
294 AllocaInst *Alloca = createStackMemory(F, Ty, Init: newSrc);
295 if (BB.getTerminator()) {
296 newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L",
297 BB.getTerminator()->getIterator());
298 } else {
299 newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", &BB);
300 }
301 }
302 return newSrc;
303}
304
305static bool isCompatibleReplacement(const Instruction *I, const Use &Operand,
306 const Value *Replacement) {
307 unsigned int OperandNo = Operand.getOperandNo();
308 if (Operand->getType() != Replacement->getType())
309 return false;
310 switch (I->getOpcode()) {
311 case Instruction::GetElementPtr:
312 case Instruction::ExtractElement:
313 case Instruction::ExtractValue:
314 // TODO: We could potentially validate these, but for now just leave indices
315 // alone.
316 if (OperandNo >= 1)
317 return false;
318 break;
319 case Instruction::InsertValue:
320 case Instruction::InsertElement:
321 case Instruction::ShuffleVector:
322 if (OperandNo >= 2)
323 return false;
324 break;
325 // For Br/Switch, we only try to modify the 1st Operand (condition).
326 // Modify other operands, like switch case may accidently change case from
327 // ConstantInt to a register, which is illegal.
328 case Instruction::Switch:
329 case Instruction::CondBr:
330 if (OperandNo >= 1)
331 return false;
332 break;
333 case Instruction::Call:
334 case Instruction::Invoke:
335 case Instruction::CallBr: {
336 const Function *Callee = cast<CallBase>(Val: I)->getCalledFunction();
337 // If it's an indirect call, give up.
338 if (!Callee)
339 return false;
340 // If callee is not an intrinsic, operand 0 is the function to be called.
341 // Since we cannot assume that the replacement is a function pointer,
342 // we give up.
343 if (!Callee->getIntrinsicID() && OperandNo == 0)
344 return false;
345 return !Callee->hasParamAttribute(ArgNo: OperandNo, Kind: Attribute::ImmArg);
346 }
347 default:
348 break;
349 }
350 return true;
351}
352
353Instruction *RandomIRBuilder::connectToSink(BasicBlock &BB,
354 ArrayRef<Instruction *> Insts,
355 Value *V) {
356 SmallVector<uint64_t, 8> SinkTys;
357 for (uint64_t i = 0; i < EndOfValueSink; i++)
358 SinkTys.push_back(Elt: i);
359 std::shuffle(first: SinkTys.begin(), last: SinkTys.end(), g&: Rand);
360 auto findSinkAndConnect =
361 [this, V](ArrayRef<Instruction *> Instructions) -> Instruction * {
362 auto RS = makeSampler<Use *>(RandGen&: Rand);
363 for (auto &I : Instructions) {
364 for (Use &U : I->operands())
365 if (isCompatibleReplacement(I, Operand: U, Replacement: V))
366 RS.sample(Item: &U, Weight: 1);
367 }
368 if (!RS.isEmpty()) {
369 Use *Sink = RS.getSelection();
370 User *U = Sink->getUser();
371 unsigned OpNo = Sink->getOperandNo();
372 U->setOperand(i: OpNo, Val: V);
373 return cast<Instruction>(Val: U);
374 }
375 return nullptr;
376 };
377 Instruction *Sink = nullptr;
378 for (uint64_t SinkTy : SinkTys) {
379 switch (SinkTy) {
380 case SinkToInstInCurBlock:
381 Sink = findSinkAndConnect(Insts);
382 if (Sink)
383 return Sink;
384 break;
385 case PointersInDominator: {
386 auto Dominators = getDominators(BB: &BB);
387 std::shuffle(first: Dominators.begin(), last: Dominators.end(), g&: Rand);
388 for (BasicBlock *Dom : Dominators) {
389 for (Instruction &I : *Dom) {
390 if (isa<PointerType>(Val: I.getType())) {
391 return buildTargetLegalStore(Val: V, Ptr: &I, IP: Insts.back()->getIterator(),
392 M: I.getModule());
393 }
394 }
395 }
396 break;
397 }
398 case InstInDominatee: {
399 auto Dominatees = getDominatees(BB: &BB);
400 std::shuffle(first: Dominatees.begin(), last: Dominatees.end(), g&: Rand);
401 for (BasicBlock *Dominee : Dominatees) {
402 std::vector<Instruction *> Instructions;
403 for (Instruction &I : *Dominee)
404 Instructions.push_back(x: &I);
405 Sink = findSinkAndConnect(Instructions);
406 if (Sink) {
407 return Sink;
408 }
409 }
410 break;
411 }
412 case NewStore:
413 /// TODO: allocate a new stack memory.
414 return newSink(BB, Insts, V);
415 case SinkToGlobalVariable: {
416 Module *M = BB.getModule();
417 auto [GV, DidCreate] =
418 findOrCreateGlobalVariable(M, Srcs: {}, Pred: fuzzerop::onlyType(Only: V->getType()));
419 return buildTargetLegalStore(Val: V, Ptr: GV, IP: Insts.back()->getIterator(), M);
420 }
421 case EndOfValueSink:
422 default:
423 llvm_unreachable("EndOfValueSink executed");
424 }
425 }
426 llvm_unreachable("Can't find a sink");
427}
428
429Instruction *RandomIRBuilder::newSink(BasicBlock &BB,
430 ArrayRef<Instruction *> Insts, Value *V) {
431 Value *Ptr = findPointer(BB, Insts);
432 if (!Ptr) {
433 if (uniform(Gen&: Rand, Min: 0, Max: 1)) {
434 Type *Ty = V->getType();
435 Ptr = createStackMemory(F: BB.getParent(), Ty, Init: PoisonValue::get(T: Ty));
436 } else {
437 Ptr = PoisonValue::get(T: PointerType::get(C&: V->getContext(), AddressSpace: 0));
438 }
439 }
440
441 return buildTargetLegalStore(Val: V, Ptr, IP: Insts.back()->getIterator(),
442 M: BB.getModule());
443}
444
445Value *RandomIRBuilder::findPointer(BasicBlock &BB,
446 ArrayRef<Instruction *> Insts) {
447 auto IsMatchingPtr = [](Instruction *Inst) {
448 // Invoke instructions sometimes produce valid pointers but currently
449 // we can't insert loads or stores from them
450 if (Inst->isTerminator())
451 return false;
452
453 return Inst->getType()->isPointerTy();
454 };
455 if (auto RS = makeSampler(RandGen&: Rand, Items: make_filter_range(Range&: Insts, Pred: IsMatchingPtr)))
456 return RS.getSelection();
457 return nullptr;
458}
459
460Type *RandomIRBuilder::randomType() {
461 uint64_t TyIdx = uniform<uint64_t>(Gen&: Rand, Min: 0, Max: KnownTypes.size() - 1);
462 return KnownTypes[TyIdx];
463}
464
465Function *RandomIRBuilder::createFunctionDeclaration(Module &M,
466 uint64_t ArgNum) {
467 Type *RetType = randomType();
468
469 SmallVector<Type *, 2> Args;
470 for (uint64_t i = 0; i < ArgNum; i++) {
471 Args.push_back(Elt: randomType());
472 }
473
474 Function *F = Function::Create(Ty: FunctionType::get(Result: RetType, Params: Args,
475 /*isVarArg=*/false),
476 Linkage: GlobalValue::ExternalLinkage, N: "f", M: &M);
477 return F;
478}
479Function *RandomIRBuilder::createFunctionDeclaration(Module &M) {
480 return createFunctionDeclaration(
481 M, ArgNum: uniform<uint64_t>(Gen&: Rand, Min: MinArgNum, Max: MaxArgNum));
482}
483
484Function *RandomIRBuilder::createFunctionDefinition(Module &M,
485 uint64_t ArgNum) {
486 Function *F = this->createFunctionDeclaration(M, ArgNum);
487
488 // TODO: Some arguments and a return value would probably be more
489 // interesting.
490 LLVMContext &Context = M.getContext();
491 const DataLayout &DL = M.getDataLayout();
492 BasicBlock *BB = BasicBlock::Create(Context, Name: "BB", Parent: F);
493 Type *RetTy = F->getReturnType();
494 if (RetTy != Type::getVoidTy(C&: Context)) {
495 Instruction *RetAlloca =
496 new AllocaInst(RetTy, DL.getAllocaAddrSpace(), "RP", BB);
497 Instruction *RetLoad = new LoadInst(RetTy, RetAlloca, "", BB);
498 ReturnInst::Create(C&: Context, retVal: RetLoad, InsertBefore: BB);
499 } else {
500 ReturnInst::Create(C&: Context, InsertAtEnd: BB);
501 }
502
503 return F;
504}
505Function *RandomIRBuilder::createFunctionDefinition(Module &M) {
506 return createFunctionDefinition(
507 M, ArgNum: uniform<uint64_t>(Gen&: Rand, Min: MinArgNum, Max: MaxArgNum));
508}
509