1//===-- RandomIRBuilder.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/FuzzMutate/RandomIRBuilder.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/FuzzMutate/OpDescriptor.h"
12#include "llvm/FuzzMutate/Random.h"
13#include "llvm/IR/BasicBlock.h"
14#include "llvm/IR/Constants.h"
15#include "llvm/IR/DataLayout.h"
16#include "llvm/IR/Dominators.h"
17#include "llvm/IR/Function.h"
18#include "llvm/IR/Instructions.h"
19#include "llvm/IR/Module.h"
20
21using namespace llvm;
22using namespace fuzzerop;
23
24/// Return a vector of Blocks that dominates this block, excluding current
25/// block.
26static std::vector<BasicBlock *> getDominators(BasicBlock *BB) {
27 std::vector<BasicBlock *> ret;
28 DominatorTree DT(*BB->getParent());
29 DomTreeNode *Node = DT.getNode(BB);
30 // It's possible that an orphan block is not in the dom tree. In that case we
31 // just return nothing.
32 if (!Node)
33 return ret;
34 Node = Node->getIDom();
35 while (Node && Node->getBlock()) {
36 ret.push_back(x: Node->getBlock());
37 // Get parent block.
38 Node = Node->getIDom();
39 }
40 return ret;
41}
42
43/// Return a vector of Blocks that is dominated by this block, excluding current
44/// block
45static std::vector<BasicBlock *> getDominatees(BasicBlock *BB) {
46 DominatorTree DT(*BB->getParent());
47 std::vector<BasicBlock *> ret;
48 DomTreeNode *Parent = DT.getNode(BB);
49 // It's possible that an orphan block is not in the dom tree. In that case we
50 // just return nothing.
51 if (!Parent)
52 return ret;
53 for (DomTreeNode *Child : Parent->children())
54 ret.push_back(x: Child->getBlock());
55 uint64_t Idx = 0;
56 while (Idx < ret.size()) {
57 DomTreeNode *Node = DT[ret[Idx]];
58 Idx++;
59 for (DomTreeNode *Child : Node->children())
60 ret.push_back(x: Child->getBlock());
61 }
62 return ret;
63}
64
65AllocaInst *RandomIRBuilder::createStackMemory(Function *F, Type *Ty,
66 Value *Init) {
67 /// TODO: For all Allocas, maybe allocate an array.
68 BasicBlock *EntryBB = &F->getEntryBlock();
69 const DataLayout &DL = F->getDataLayout();
70 AllocaInst *Alloca = new AllocaInst(Ty, DL.getAllocaAddrSpace(), "A",
71 EntryBB->getFirstInsertionPt());
72 if (Init)
73 new StoreInst(Init, Alloca, std::next(x: Alloca->getIterator()));
74 return Alloca;
75}
76
77std::pair<GlobalVariable *, bool>
78RandomIRBuilder::findOrCreateGlobalVariable(Module *M, ArrayRef<Value *> Srcs,
79 fuzzerop::SourcePred Pred) {
80 auto MatchesPred = [&Srcs, &Pred](GlobalVariable *GV) {
81 // Can't directly compare GV's type, as it would be a pointer to the actual
82 // type.
83 return Pred.matches(Cur: Srcs, New: PoisonValue::get(T: GV->getValueType()));
84 };
85 bool DidCreate = false;
86 SmallVector<GlobalVariable *, 4> GlobalVars(
87 llvm::make_pointer_range(Range: M->globals()));
88 auto RS = makeSampler(RandGen&: Rand, Items: make_filter_range(Range&: GlobalVars, Pred: MatchesPred));
89 RS.sample(Item: nullptr, Weight: 1);
90 GlobalVariable *GV = RS.getSelection();
91 if (!GV) {
92 DidCreate = true;
93 using LinkageTypes = GlobalVariable::LinkageTypes;
94 auto TRS = makeSampler<Constant *>(RandGen&: Rand);
95 TRS.sample(Items: Pred.generate(Cur: Srcs, BaseTypes: KnownTypes));
96 Constant *Init = TRS.getSelection();
97 Type *Ty = Init->getType();
98 GV = new GlobalVariable(*M, Ty, false, LinkageTypes::ExternalLinkage, Init,
99 "G", nullptr,
100 GlobalValue::ThreadLocalMode::NotThreadLocal,
101 M->getDataLayout().getDefaultGlobalsAddressSpace());
102 }
103 return {GV, DidCreate};
104}
105
106Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
107 ArrayRef<Instruction *> Insts) {
108 return findOrCreateSource(BB, Insts, Srcs: {}, Pred: anyType());
109}
110
111// Adapts the current pointer for a legal mem operation on the target arch.
112static Value *buildTargetLegalPtr(Module *M, Value *Ptr, InsertPosition IP,
113 const Twine &Name,
114 SmallVector<Instruction *> *NewInsts) {
115 if (M && M->getTargetTriple().isAMDGCN()) {
116 // Check if we should perform an address space cast
117 PointerType *pointerType = dyn_cast<PointerType>(Val: Ptr->getType());
118 if (pointerType && pointerType->getAddressSpace() == 8) {
119 // Perform address space cast from address space 8 to address space 7
120 auto NewPtr = new AddrSpaceCastInst(
121 Ptr, PointerType::get(C&: M->getContext(), AddressSpace: 7), Name + ".ASC", IP);
122 if (NewInsts)
123 NewInsts->push_back(Elt: NewPtr);
124 return NewPtr;
125 }
126 }
127
128 return Ptr;
129}
130
131// Stores a value to memory, considering the target triple's restrictions.
132static Instruction *buildTargetLegalStore(Value *Val, Value *Ptr,
133 InsertPosition IP, Module *M) {
134 Value *StorePtr = buildTargetLegalPtr(M, Ptr, IP, Name: "", NewInsts: nullptr);
135 Instruction *Store = new StoreInst(Val, StorePtr, IP);
136 return Store;
137}
138
139// Loads a value from memory, considering the target triple's restrictions.
140static std::pair<Instruction *, SmallVector<Instruction *>>
141buildTargetLegalLoad(Type *AccessTy, Value *Ptr, InsertPosition IP, Module *M,
142 const Twine &LoadName) {
143 SmallVector<Instruction *> NewInsts;
144
145 Value *LoadPtr = buildTargetLegalPtr(M, Ptr, IP, Name: LoadName, NewInsts: &NewInsts);
146
147 Instruction *Load = new LoadInst(AccessTy, LoadPtr, LoadName, IP);
148 NewInsts.push_back(Elt: Load);
149
150 return std::make_pair(x&: Load, y&: NewInsts);
151}
152
153static void eraseNewInstructions(SmallVector<Instruction *> &NewInsts) {
154 // Remove in reverse order (uses before defs)
155 for (auto it = NewInsts.rbegin(); it != NewInsts.rend(); ++it) {
156 (*it)->eraseFromParent();
157 }
158}
159
160Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
161 ArrayRef<Instruction *> Insts,
162 ArrayRef<Value *> Srcs,
163 SourcePred Pred,
164 bool allowConstant) {
165 auto MatchesPred = [&Srcs, &Pred](Value *V) { return Pred.matches(Cur: Srcs, New: V); };
166 SmallVector<uint64_t, 8> SrcTys;
167 for (uint64_t i = 0; i < EndOfValueSource; i++)
168 SrcTys.push_back(Elt: i);
169 std::shuffle(first: SrcTys.begin(), last: SrcTys.end(), g&: Rand);
170 for (uint64_t SrcTy : SrcTys) {
171 switch (SrcTy) {
172 case SrcFromInstInCurBlock: {
173 auto RS = makeSampler(RandGen&: Rand, Items: make_filter_range(Range&: Insts, Pred: MatchesPred));
174 if (!RS.isEmpty()) {
175 return RS.getSelection();
176 }
177 break;
178 }
179 case FunctionArgument: {
180 Function *F = BB.getParent();
181 SmallVector<Argument *, 8> Args;
182 for (uint64_t i = 0; i < F->arg_size(); i++) {
183 Args.push_back(Elt: F->getArg(i));
184 }
185 auto RS = makeSampler(RandGen&: Rand, Items: make_filter_range(Range&: Args, Pred: MatchesPred));
186 if (!RS.isEmpty()) {
187 return RS.getSelection();
188 }
189 break;
190 }
191 case InstInDominator: {
192 auto Dominators = getDominators(BB: &BB);
193 std::shuffle(first: Dominators.begin(), last: Dominators.end(), g&: Rand);
194 for (BasicBlock *Dom : Dominators) {
195 SmallVector<Instruction *, 16> Instructions(
196 llvm::make_pointer_range(Range&: *Dom));
197 auto RS =
198 makeSampler(RandGen&: Rand, Items: make_filter_range(Range&: Instructions, Pred: MatchesPred));
199 // Also consider choosing no source, meaning we want a new one.
200 if (!RS.isEmpty()) {
201 return RS.getSelection();
202 }
203 }
204 break;
205 }
206 case SrcFromGlobalVariable: {
207 Module *M = BB.getParent()->getParent();
208 auto [GV, DidCreate] = findOrCreateGlobalVariable(M, Srcs, Pred);
209 Type *Ty = GV->getValueType();
210 InsertPosition IP = BB.getTerminator()
211 ? InsertPosition(BB.getFirstInsertionPt())
212 : InsertPosition(&BB);
213 // Build a legal load and track new instructions in case a rollback is
214 // needed.
215 auto [LoadGV, NewInsts] = buildTargetLegalLoad(AccessTy: Ty, Ptr: GV, IP, M, LoadName: "LGV");
216 // Because we might be generating new values, we have to check if it
217 // matches again.
218 if (DidCreate) {
219 if (Pred.matches(Cur: Srcs, New: LoadGV)) {
220 return LoadGV;
221 }
222 // Remove newly inserted instructions
223 eraseNewInstructions(NewInsts);
224 // If no one is using this GlobalVariable, delete it too.
225 if (GV->use_empty()) {
226 GV->eraseFromParent();
227 }
228 }
229 break;
230 }
231 case NewConstOrStack: {
232 return newSource(BB, Insts, Srcs, Pred, allowConstant);
233 }
234 default:
235 case EndOfValueSource: {
236 llvm_unreachable("EndOfValueSource executed");
237 }
238 }
239 }
240 llvm_unreachable("Can't find a source");
241}
242
243Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
244 ArrayRef<Value *> Srcs, SourcePred Pred,
245 bool allowConstant) {
246 // Generate some constants to choose from.
247 auto RS = makeSampler<Value *>(RandGen&: Rand);
248 RS.sample(Items: Pred.generate(Cur: Srcs, BaseTypes: KnownTypes));
249
250 // If we can find a pointer to load from, use it half the time.
251 Value *Ptr = findPointer(BB, Insts);
252 if (Ptr) {
253 // Create load from the chosen pointer
254 auto IP = BB.getFirstInsertionPt();
255 if (auto *I = dyn_cast<Instruction>(Val: Ptr)) {
256 IP = ++I->getIterator();
257 assert(IP != BB.end() && "guaranteed by the findPointer");
258 }
259 // Pick the type independently.
260 Type *AccessTy = RS.getSelection()->getType();
261 // Build a legal load and track new instructions in case a rollback is
262 // needed.
263 auto [NewLoad, NewInsts] =
264 buildTargetLegalLoad(AccessTy, Ptr, IP, M: BB.getModule(), LoadName: "L");
265
266 // Only sample this load if it really matches the descriptor
267 if (Pred.matches(Cur: Srcs, New: NewLoad))
268 RS.sample(Item: NewLoad, Weight: RS.totalWeight());
269 else {
270 // Remove newly inserted instructions
271 eraseNewInstructions(NewInsts);
272 }
273 }
274
275 Value *newSrc = RS.getSelection();
276 // Generate a stack alloca and store the constant to it if constant is not
277 // allowed, our hope is that later mutations can generate some values and
278 // store to this placeholder.
279 if (!allowConstant && isa<Constant>(Val: newSrc)) {
280 Type *Ty = newSrc->getType();
281 Function *F = BB.getParent();
282 AllocaInst *Alloca = createStackMemory(F, Ty, Init: newSrc);
283 if (BB.getTerminator()) {
284 newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L",
285 BB.getTerminator()->getIterator());
286 } else {
287 newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", &BB);
288 }
289 }
290 return newSrc;
291}
292
293static bool isCompatibleReplacement(const Instruction *I, const Use &Operand,
294 const Value *Replacement) {
295 unsigned int OperandNo = Operand.getOperandNo();
296 if (Operand->getType() != Replacement->getType())
297 return false;
298 switch (I->getOpcode()) {
299 case Instruction::GetElementPtr:
300 case Instruction::ExtractElement:
301 case Instruction::ExtractValue:
302 // TODO: We could potentially validate these, but for now just leave indices
303 // alone.
304 if (OperandNo >= 1)
305 return false;
306 break;
307 case Instruction::InsertValue:
308 case Instruction::InsertElement:
309 case Instruction::ShuffleVector:
310 if (OperandNo >= 2)
311 return false;
312 break;
313 // For Br/Switch, we only try to modify the 1st Operand (condition).
314 // Modify other operands, like switch case may accidently change case from
315 // ConstantInt to a register, which is illegal.
316 case Instruction::Switch:
317 case Instruction::Br:
318 if (OperandNo >= 1)
319 return false;
320 break;
321 case Instruction::Call:
322 case Instruction::Invoke:
323 case Instruction::CallBr: {
324 const Function *Callee = cast<CallBase>(Val: I)->getCalledFunction();
325 // If it's an indirect call, give up.
326 if (!Callee)
327 return false;
328 // If callee is not an intrinsic, operand 0 is the function to be called.
329 // Since we cannot assume that the replacement is a function pointer,
330 // we give up.
331 if (!Callee->getIntrinsicID() && OperandNo == 0)
332 return false;
333 return !Callee->hasParamAttribute(ArgNo: OperandNo, Kind: Attribute::ImmArg);
334 }
335 default:
336 break;
337 }
338 return true;
339}
340
341Instruction *RandomIRBuilder::connectToSink(BasicBlock &BB,
342 ArrayRef<Instruction *> Insts,
343 Value *V) {
344 SmallVector<uint64_t, 8> SinkTys;
345 for (uint64_t i = 0; i < EndOfValueSink; i++)
346 SinkTys.push_back(Elt: i);
347 std::shuffle(first: SinkTys.begin(), last: SinkTys.end(), g&: Rand);
348 auto findSinkAndConnect =
349 [this, V](ArrayRef<Instruction *> Instructions) -> Instruction * {
350 auto RS = makeSampler<Use *>(RandGen&: Rand);
351 for (auto &I : Instructions) {
352 for (Use &U : I->operands())
353 if (isCompatibleReplacement(I, Operand: U, Replacement: V))
354 RS.sample(Item: &U, Weight: 1);
355 }
356 if (!RS.isEmpty()) {
357 Use *Sink = RS.getSelection();
358 User *U = Sink->getUser();
359 unsigned OpNo = Sink->getOperandNo();
360 U->setOperand(i: OpNo, Val: V);
361 return cast<Instruction>(Val: U);
362 }
363 return nullptr;
364 };
365 Instruction *Sink = nullptr;
366 for (uint64_t SinkTy : SinkTys) {
367 switch (SinkTy) {
368 case SinkToInstInCurBlock:
369 Sink = findSinkAndConnect(Insts);
370 if (Sink)
371 return Sink;
372 break;
373 case PointersInDominator: {
374 auto Dominators = getDominators(BB: &BB);
375 std::shuffle(first: Dominators.begin(), last: Dominators.end(), g&: Rand);
376 for (BasicBlock *Dom : Dominators) {
377 for (Instruction &I : *Dom) {
378 if (isa<PointerType>(Val: I.getType())) {
379 return buildTargetLegalStore(Val: V, Ptr: &I, IP: Insts.back()->getIterator(),
380 M: I.getModule());
381 }
382 }
383 }
384 break;
385 }
386 case InstInDominatee: {
387 auto Dominatees = getDominatees(BB: &BB);
388 std::shuffle(first: Dominatees.begin(), last: Dominatees.end(), g&: Rand);
389 for (BasicBlock *Dominee : Dominatees) {
390 std::vector<Instruction *> Instructions;
391 for (Instruction &I : *Dominee)
392 Instructions.push_back(x: &I);
393 Sink = findSinkAndConnect(Instructions);
394 if (Sink) {
395 return Sink;
396 }
397 }
398 break;
399 }
400 case NewStore:
401 /// TODO: allocate a new stack memory.
402 return newSink(BB, Insts, V);
403 case SinkToGlobalVariable: {
404 Module *M = BB.getModule();
405 auto [GV, DidCreate] =
406 findOrCreateGlobalVariable(M, Srcs: {}, Pred: fuzzerop::onlyType(Only: V->getType()));
407 return buildTargetLegalStore(Val: V, Ptr: GV, IP: Insts.back()->getIterator(), M);
408 }
409 case EndOfValueSink:
410 default:
411 llvm_unreachable("EndOfValueSink executed");
412 }
413 }
414 llvm_unreachable("Can't find a sink");
415}
416
417Instruction *RandomIRBuilder::newSink(BasicBlock &BB,
418 ArrayRef<Instruction *> Insts, Value *V) {
419 Value *Ptr = findPointer(BB, Insts);
420 if (!Ptr) {
421 if (uniform(Gen&: Rand, Min: 0, Max: 1)) {
422 Type *Ty = V->getType();
423 Ptr = createStackMemory(F: BB.getParent(), Ty, Init: PoisonValue::get(T: Ty));
424 } else {
425 Ptr = PoisonValue::get(T: PointerType::get(C&: V->getContext(), AddressSpace: 0));
426 }
427 }
428
429 return buildTargetLegalStore(Val: V, Ptr, IP: Insts.back()->getIterator(),
430 M: BB.getModule());
431}
432
433Value *RandomIRBuilder::findPointer(BasicBlock &BB,
434 ArrayRef<Instruction *> Insts) {
435 auto IsMatchingPtr = [](Instruction *Inst) {
436 // Invoke instructions sometimes produce valid pointers but currently
437 // we can't insert loads or stores from them
438 if (Inst->isTerminator())
439 return false;
440
441 return Inst->getType()->isPointerTy();
442 };
443 if (auto RS = makeSampler(RandGen&: Rand, Items: make_filter_range(Range&: Insts, Pred: IsMatchingPtr)))
444 return RS.getSelection();
445 return nullptr;
446}
447
448Type *RandomIRBuilder::randomType() {
449 uint64_t TyIdx = uniform<uint64_t>(Gen&: Rand, Min: 0, Max: KnownTypes.size() - 1);
450 return KnownTypes[TyIdx];
451}
452
453Function *RandomIRBuilder::createFunctionDeclaration(Module &M,
454 uint64_t ArgNum) {
455 Type *RetType = randomType();
456
457 SmallVector<Type *, 2> Args;
458 for (uint64_t i = 0; i < ArgNum; i++) {
459 Args.push_back(Elt: randomType());
460 }
461
462 Function *F = Function::Create(Ty: FunctionType::get(Result: RetType, Params: Args,
463 /*isVarArg=*/false),
464 Linkage: GlobalValue::ExternalLinkage, N: "f", M: &M);
465 return F;
466}
467Function *RandomIRBuilder::createFunctionDeclaration(Module &M) {
468 return createFunctionDeclaration(
469 M, ArgNum: uniform<uint64_t>(Gen&: Rand, Min: MinArgNum, Max: MaxArgNum));
470}
471
472Function *RandomIRBuilder::createFunctionDefinition(Module &M,
473 uint64_t ArgNum) {
474 Function *F = this->createFunctionDeclaration(M, ArgNum);
475
476 // TODO: Some arguments and a return value would probably be more
477 // interesting.
478 LLVMContext &Context = M.getContext();
479 const DataLayout &DL = M.getDataLayout();
480 BasicBlock *BB = BasicBlock::Create(Context, Name: "BB", Parent: F);
481 Type *RetTy = F->getReturnType();
482 if (RetTy != Type::getVoidTy(C&: Context)) {
483 Instruction *RetAlloca =
484 new AllocaInst(RetTy, DL.getAllocaAddrSpace(), "RP", BB);
485 Instruction *RetLoad = new LoadInst(RetTy, RetAlloca, "", BB);
486 ReturnInst::Create(C&: Context, retVal: RetLoad, InsertBefore: BB);
487 } else {
488 ReturnInst::Create(C&: Context, InsertAtEnd: BB);
489 }
490
491 return F;
492}
493Function *RandomIRBuilder::createFunctionDefinition(Module &M) {
494 return createFunctionDefinition(
495 M, ArgNum: uniform<uint64_t>(Gen&: Rand, Min: MinArgNum, Max: MaxArgNum));
496}
497