FuzzerFork.cpp source code [llvm_runtimes/compiler-rt/lib/fuzzer/FuzzerFork.cpp]

1	//===- FuzzerFork.cpp - run fuzzing in separate subprocesses --------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	// Spawn and orchestrate separate fuzzing processes.
9	//===----------------------------------------------------------------------===//
10
11	#include "FuzzerCommand.h"
12	#include "FuzzerFork.h"
13	#include "FuzzerIO.h"
14	#include "FuzzerInternal.h"
15	#include "FuzzerMerge.h"
16	#include "FuzzerSHA1.h"
17	#include "FuzzerTracePC.h"
18	#include "FuzzerUtil.h"
19
20	#include <atomic>
21	#include <chrono>
22	#include <condition_variable>
23	#include <fstream>
24	#include <memory>
25	#include <mutex>
26	#include <queue>
27	#include <sstream>
28	#include <thread>
29
30	namespace fuzzer {
31
32	struct Stats {
33	size_t number_of_executed_units = `0`;
34	size_t peak_rss_mb = `0`;
35	size_t average_exec_per_sec = `0`;
36	};
37
38	static Stats ParseFinalStatsFromLog(const std::string &LogPath) {
39	std::ifstream In(LogPath);
40	std::string Line;
41	Stats Res;
42	struct {
43	const char *Name;
44	size_t *Var;
45	} NameVarPairs[] = {
46	{"stat::number_of_executed_units:", &Res.number_of_executed_units},
47	{"stat::peak_rss_mb:", &Res.peak_rss_mb},
48	{"stat::average_exec_per_sec:", &Res.average_exec_per_sec},
49	{nullptr, nullptr},
50	};
51	while (std::getline(In, Line, `'\n'`)) {
52	if (Line.find("stat::") != `0`) continue;
53	std::istringstream ISS(Line);
54	std::string Name;
55	size_t Val;
56	ISS >> Name >> Val;
57	for (size_t i = `0`; NameVarPairs[i].Name; i++)
58	if (Name == NameVarPairs[i].Name)
59	*NameVarPairs[i].Var = Val;
60	}
61	return Res;
62	}
63
64	struct FuzzJob {
65	// Inputs.
66	Command Cmd;
67	std::string CorpusDir;
68	std::string FeaturesDir;
69	std::string LogPath;
70	std::string SeedListPath;
71	std::string CFPath;
72	size_t JobId;
73
74	int DftTimeInSeconds = `0`;
75
76	// Fuzzing Outputs.
77	int ExitCode;
78
79	~FuzzJob() {
80	RemoveFile(CFPath);
81	RemoveFile(LogPath);
82	RemoveFile(SeedListPath);
83	RmDirRecursive(CorpusDir);
84	RmDirRecursive(FeaturesDir);
85	}
86	};
87
88	struct GlobalEnv {
89	std::vector<std::string> Args;
90	std::vector<std::string> CorpusDirs;
91	std::string MainCorpusDir;
92	std::string TempDir;
93	std::string DFTDir;
94	std::string DataFlowBinary;
95	std::set<uint32_t> Features, Cov;
96	std::set<std::string> FilesWithDFT;
97	std::vector<std::string> Files;
98	std::vector<std::size_t> FilesSizes;
99	Random *Rand;
100	std::chrono::system_clock::time_point ProcessStartTime;
101	int Verbosity = `0`;
102	int Group = `0`;
103	int NumCorpuses = `8`;
104
105	size_t NumTimeouts = `0`;
106	size_t NumOOMs = `0`;
107	size_t NumCrashes = `0`;
108
109
110	size_t NumRuns = `0`;
111
112	std::string StopFile() { return DirPlusFile(TempDir, "STOP"); }
113
114	size_t secondsSinceProcessStartUp() const {
115	return std::chrono::duration_cast<std::chrono::seconds>(
116	std::chrono::system_clock::now() - ProcessStartTime)
117	.count();
118	}
119
120	FuzzJob *CreateNewJob(size_t JobId) {
121	Command Cmd(Args);
122	Cmd.removeFlag("fork");
123	Cmd.removeFlag("runs");
124	Cmd.removeFlag("collect_data_flow");
125	for (auto &C : CorpusDirs) // Remove all corpora from the args.
126	Cmd.removeArgument(C);
127	Cmd.addFlag("reload", "0"); // working in an isolated dir, no reload.
128	Cmd.addFlag("print_final_stats", "1");
129	Cmd.addFlag("print_funcs", "0"); // no need to spend time symbolizing.
130	Cmd.addFlag("max_total_time", std::to_string(std::min((size_t)`300`, JobId)));
131	Cmd.addFlag("stop_file", StopFile());
132	if (!DataFlowBinary.empty()) {
133	Cmd.addFlag("data_flow_trace", DFTDir);
134	if (!Cmd.hasFlag("focus_function"))
135	Cmd.addFlag("focus_function", "auto");
136	}
137	auto Job = new FuzzJob;
138	std::string Seeds;
139	if (size_t CorpusSubsetSize =
140	std::min(Files.size(), (size_t)sqrt(Files.size() + `2`))) {
141	auto Time1 = std::chrono::system_clock::now();
142	if (Group) { // whether to group the corpus.
143	size_t AverageCorpusSize = Files.size() / NumCorpuses + `1`;
144	size_t StartIndex = ((JobId - `1`) % NumCorpuses) * AverageCorpusSize;
145	for (size_t i = `0`; i < CorpusSubsetSize; i++) {
146	size_t RandNum = (*Rand)(AverageCorpusSize);
147	size_t Index = RandNum + StartIndex;
148	Index = Index < Files.size() ? Index
149	: Rand->SkewTowardsLast(Files.size());
150	auto &SF = Files[Index];
151	Seeds += (Seeds.empty() ? "" : ",") + SF;
152	CollectDFT(SF);
153	}
154	} else {
155	for (size_t i = `0`; i < CorpusSubsetSize; i++) {
156	auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
157	Seeds += (Seeds.empty() ? "" : ",") + SF;
158	CollectDFT(SF);
159	}
160	}
161	auto Time2 = std::chrono::system_clock::now();
162	auto DftTimeInSeconds = duration_cast<seconds>(Time2 - Time1).count();
163	assert(DftTimeInSeconds < std::numeric_limits<int>::max());
164	Job->DftTimeInSeconds = static_cast<int>(DftTimeInSeconds);
165	}
166	if (!Seeds.empty()) {
167	Job->SeedListPath =
168	DirPlusFile(TempDir, std::to_string(JobId) + ".seeds");
169	WriteToFile(Seeds, Job->SeedListPath);
170	Cmd.addFlag("seed_inputs", "@" + Job->SeedListPath);
171	}
172	Job->LogPath = DirPlusFile(TempDir, std::to_string(JobId) + ".log");
173	Job->CorpusDir = DirPlusFile(TempDir, "C" + std::to_string(JobId));
174	Job->FeaturesDir = DirPlusFile(TempDir, "F" + std::to_string(JobId));
175	Job->CFPath = DirPlusFile(TempDir, std::to_string(JobId) + ".merge");
176	Job->JobId = JobId;
177
178
179	Cmd.addArgument(Job->CorpusDir);
180	Cmd.addFlag("features_dir", Job->FeaturesDir);
181
182	for (auto &D : {Job->CorpusDir, Job->FeaturesDir}) {
183	RmDirRecursive(D);
184	MkDir(D);
185	}
186
187	Cmd.setOutputFile(Job->LogPath);
188	Cmd.combineOutAndErr();
189
190	Job->Cmd = Cmd;
191
192	if (Verbosity >= `2`)
193	Printf("Job %zd/%p Created: %s\n", JobId, Job,
194	Job->Cmd.toString().c_str());
195	// Start from very short runs and gradually increase them.
196	return Job;
197	}
198
199	void RunOneMergeJob(FuzzJob *Job) {
200	auto Stats = ParseFinalStatsFromLog(Job->LogPath);
201	NumRuns += Stats.number_of_executed_units;
202
203	std::vector<SizedFile> TempFiles, MergeCandidates;
204	// Read all newly created inputs and their feature sets.
205	// Choose only those inputs that have new features.
206	GetSizedFilesFromDir(Job->CorpusDir, &TempFiles);
207	std::sort(TempFiles.begin(), TempFiles.end());
208	for (auto &F : TempFiles) {
209	auto FeatureFile = F.File;
210	FeatureFile.replace(`0`, Job->CorpusDir.size(), Job->FeaturesDir);
211	auto FeatureBytes = FileToVector(FeatureFile, `0`, false);
212	assert((FeatureBytes.size() % sizeof(uint32_t)) == `0`);
213	std::vector<uint32_t> NewFeatures(FeatureBytes.size() / sizeof(uint32_t));
214	memcpy(NewFeatures.data(), FeatureBytes.data(), FeatureBytes.size());
215	for (auto Ft : NewFeatures) {
216	if (!Features.count(Ft)) {
217	MergeCandidates.push_back(F);
218	break;
219	}
220	}
221	}
222	// if (!FilesToAdd.empty() \|\| Job->ExitCode != 0)
223	Printf("#%zd: cov: %zd ft: %zd corp: %zd exec/s: %zd "
224	"oom/timeout/crash: %zd/%zd/%zd time: %zds job: %zd dft_time: %d\n",
225	NumRuns, Cov.size(), Features.size(), Files.size(),
226	Stats.average_exec_per_sec, NumOOMs, NumTimeouts, NumCrashes,
227	secondsSinceProcessStartUp(), Job->JobId, Job->DftTimeInSeconds);
228
229	if (MergeCandidates.empty()) return;
230
231	std::vector<std::string> FilesToAdd;
232	std::set<uint32_t> NewFeatures, NewCov;
233	bool IsSetCoverMerge =
234	!Job->Cmd.getFlagValue("set_cover_merge").compare("1");
235	CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features,
236	&NewFeatures, Cov, &NewCov, Job->CFPath, false,
237	IsSetCoverMerge);
238	for (auto &Path : FilesToAdd) {
239	auto U = FileToVector(Path);
240	auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
241	WriteToFile(U, NewPath);
242	if (Group) { // Insert the queue according to the size of the seed.
243	size_t UnitSize = U.size();
244	auto Idx =
245	std::upper_bound(FilesSizes.begin(), FilesSizes.end(), UnitSize) -
246	FilesSizes.begin();
247	FilesSizes.insert(FilesSizes.begin() + Idx, UnitSize);
248	Files.insert(Files.begin() + Idx, NewPath);
249	} else {
250	Files.push_back(NewPath);
251	}
252	}
253	Features.insert(NewFeatures.begin(), NewFeatures.end());
254	Cov.insert(NewCov.begin(), NewCov.end());
255	for (auto Idx : NewCov)
256	if (auto *TE = TPC.PCTableEntryByIdx(Idx))
257	if (TPC.PcIsFuncEntry(TE))
258	PrintPC(" NEW_FUNC: %p %F %L\n", "",
259	TPC.GetNextInstructionPc(TE->PC));
260	}
261
262	void CollectDFT(const std::string &InputPath) {
263	if (DataFlowBinary.empty()) return;
264	if (!FilesWithDFT.insert(InputPath).second) return;
265	Command Cmd(Args);
266	Cmd.removeFlag("fork");
267	Cmd.removeFlag("runs");
268	Cmd.addFlag("data_flow_trace", DFTDir);
269	Cmd.addArgument(InputPath);
270	for (auto &C : CorpusDirs) // Remove all corpora from the args.
271	Cmd.removeArgument(C);
272	Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log"));
273	Cmd.combineOutAndErr();
274	// Printf("CollectDFT: %s\n", Cmd.toString().c_str());
275	ExecuteCommand(Cmd);
276	}
277
278	};
279
280	struct JobQueue {
281	std::queue<FuzzJob *> Qu;
282	std::mutex Mu;
283	std::condition_variable Cv;
284
285	void Push(FuzzJob *Job) {
286	{
287	std::lock_guard<std::mutex> Lock(Mu);
288	Qu.push(Job);
289	}
290	Cv.notify_one();
291	}
292	FuzzJob *Pop() {
293	std::unique_lock<std::mutex> Lk(Mu);
294	// std::lock_guard<std::mutex> Lock(Mu);
295	Cv.wait(Lk, [&]{return !Qu.empty();});
296	assert(!Qu.empty());
297	auto Job = Qu.front();
298	Qu.pop();
299	return Job;
300	}
301	};
302
303	void WorkerThread(JobQueue FuzzQ, JobQueue MergeQ) {
304	while (auto Job = FuzzQ->Pop()) {
305	// Printf("WorkerThread: job %p\n", Job);
306	Job->ExitCode = ExecuteCommand(Job->Cmd);
307	MergeQ->Push(Job);
308	}
309	}
310
311	// This is just a skeleton of an experimental -fork=1 feature.
312	void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
313	const std::vector<std::string> &Args,
314	const std::vector<std::string> &CorpusDirs, int NumJobs) {
315	Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs);
316
317	GlobalEnv Env;
318	Env.Args = Args;
319	Env.CorpusDirs = CorpusDirs;
320	Env.Rand = &Rand;
321	Env.Verbosity = Options.Verbosity;
322	Env.ProcessStartTime = std::chrono::system_clock::now();
323	Env.DataFlowBinary = Options.CollectDataFlow;
324	Env.Group = Options.ForkCorpusGroups;
325
326	std::vector<SizedFile> SeedFiles;
327	for (auto &Dir : CorpusDirs)
328	GetSizedFilesFromDir(Dir, &SeedFiles);
329	std::sort(SeedFiles.begin(), SeedFiles.end());
330	Env.TempDir = TempPath("FuzzWithFork", ".dir");
331	Env.DFTDir = DirPlusFile(Env.TempDir, "DFT");
332	RmDirRecursive(Env.TempDir); // in case there is a leftover from old runs.
333	MkDir(Env.TempDir);
334	MkDir(Env.DFTDir);
335
336
337	if (CorpusDirs.empty())
338	MkDir(Env.MainCorpusDir = DirPlusFile(Env.TempDir, "C"));
339	else
340	Env.MainCorpusDir = CorpusDirs[`0`];
341
342	if (Options.KeepSeed) {
343	for (auto &File : SeedFiles)
344	Env.Files.push_back(File.File);
345	} else {
346	auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
347	std::set<uint32_t> NewFeatures, NewCov;
348	CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, Env.Features,
349	&NewFeatures, Env.Cov, &NewCov, CFPath,
350	/Verbose=/false, /IsSetCoverMerge=/false);
351	Env.Features.insert(NewFeatures.begin(), NewFeatures.end());
352	Env.Cov.insert(NewCov.begin(), NewCov.end());
353	RemoveFile(CFPath);
354	}
355
356	if (Env.Group) {
357	for (auto &path : Env.Files)
358	Env.FilesSizes.push_back(FileSize(path));
359	}
360
361	Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
362	Env.Files.size(), Env.TempDir.c_str());
363
364	int ExitCode = `0`;
365
366	JobQueue FuzzQ, MergeQ;
367
368	auto StopJobs = [&]() {
369	for (int i = `0`; i < NumJobs; i++)
370	FuzzQ.Push(nullptr);
371	MergeQ.Push(nullptr);
372	WriteToFile(Unit({`1`}), Env.StopFile());
373	};
374
375	size_t MergeCycle = `20`;
376	size_t JobExecuted = `0`;
377	size_t JobId = `1`;
378	std::vector<std::thread> Threads;
379	for (int t = `0`; t < NumJobs; t++) {
380	Threads.push_back(std::thread(WorkerThread, &FuzzQ, &MergeQ));
381	FuzzQ.Push(Env.CreateNewJob(JobId++));
382	}
383
384	while (true) {
385	std::unique_ptr<FuzzJob> Job(MergeQ.Pop());
386	if (!Job)
387	break;
388	ExitCode = Job->ExitCode;
389	if (ExitCode == Options.InterruptExitCode) {
390	Printf("==%lu== libFuzzer: a child was interrupted; exiting\n", GetPid());
391	StopJobs();
392	break;
393	}
394	Fuzzer::MaybeExitGracefully();
395
396	Env.RunOneMergeJob(Job.get());
397
398	// merge the corpus .
399	JobExecuted++;
400	if (Env.Group && JobExecuted >= MergeCycle) {
401	std::vector<SizedFile> CurrentSeedFiles;
402	for (auto &Dir : CorpusDirs)
403	GetSizedFilesFromDir(Dir, &CurrentSeedFiles);
404	std::sort(CurrentSeedFiles.begin(), CurrentSeedFiles.end());
405
406	auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
407	std::set<uint32_t> TmpNewFeatures, TmpNewCov;
408	std::set<uint32_t> TmpFeatures, TmpCov;
409	Env.Files.clear();
410	Env.FilesSizes.clear();
411	CrashResistantMerge(Env.Args, {}, CurrentSeedFiles, &Env.Files,
412	TmpFeatures, &TmpNewFeatures, TmpCov, &TmpNewCov,
413	CFPath, /Verbose=/false, /IsSetCoverMerge=/false);
414	for (auto &path : Env.Files)
415	Env.FilesSizes.push_back(FileSize(path));
416	RemoveFile(CFPath);
417	JobExecuted = `0`;
418	MergeCycle += `5`;
419	}
420
421	// Since the number of corpus seeds will gradually increase, in order to
422	// control the number in each group to be about three times the number of
423	// seeds selected each time, the number of groups is dynamically adjusted.
424	if (Env.Files.size() < `2000`)
425	Env.NumCorpuses = `12`;
426	else if (Env.Files.size() < `6000`)
427	Env.NumCorpuses = `20`;
428	else if (Env.Files.size() < `12000`)
429	Env.NumCorpuses = `32`;
430	else if (Env.Files.size() < `16000`)
431	Env.NumCorpuses = `40`;
432	else if (Env.Files.size() < `24000`)
433	Env.NumCorpuses = `60`;
434	else
435	Env.NumCorpuses = `80`;
436
437	// Continue if our crash is one of the ignored ones.
438	if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode)
439	Env.NumTimeouts++;
440	else if (Options.IgnoreOOMs && ExitCode == Options.OOMExitCode)
441	Env.NumOOMs++;
442	else if (ExitCode != `0`) {
443	Env.NumCrashes++;
444	if (Options.IgnoreCrashes) {
445	std::ifstream In(Job->LogPath);
446	std::string Line;
447	while (std::getline(In, Line, `'\n'`))
448	if (Line.find("ERROR:") != Line.npos \|\|
449	Line.find("runtime error:") != Line.npos)
450	Printf("%s\n", Line.c_str());
451	} else {
452	// And exit if we don't ignore this crash.
453	Printf("INFO: log from the inner process:\n%s",
454	FileToString(Job->LogPath).c_str());
455	StopJobs();
456	break;
457	}
458	}
459
460	// Stop if we are over the time budget.
461	// This is not precise, since other threads are still running
462	// and we will wait while joining them.
463	// We also don't stop instantly: other jobs need to finish.
464	if (Options.MaxTotalTimeSec > `0` &&
465	Env.secondsSinceProcessStartUp() >= (size_t)Options.MaxTotalTimeSec) {
466	Printf("INFO: fuzzed for %zd seconds, wrapping up soon\n",
467	Env.secondsSinceProcessStartUp());
468	StopJobs();
469	break;
470	}
471	if (Env.NumRuns >= Options.MaxNumberOfRuns) {
472	Printf("INFO: fuzzed for %zd iterations, wrapping up soon\n",
473	Env.NumRuns);
474	StopJobs();
475	break;
476	}
477
478	FuzzQ.Push(Env.CreateNewJob(JobId++));
479	}
480
481	for (auto &T : Threads)
482	T.join();
483
484	// The workers have terminated. Don't try to remove the directory before they
485	// terminate to avoid a race condition preventing cleanup on Windows.
486	RmDirRecursive(Env.TempDir);
487
488	// Use the exit code from the last child process.
489	Printf("INFO: exiting: %d time: %zds\n", ExitCode,
490	Env.secondsSinceProcessStartUp());
491	exit(ExitCode);
492	}
493
494	} // namespace fuzzer
495

Browse the source code of llvm_runtimes/compiler-rt/lib/fuzzer/FuzzerFork.cpp