1//===- FuzzerFork.cpp - run fuzzing in separate subprocesses --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// Spawn and orchestrate separate fuzzing processes.
9//===----------------------------------------------------------------------===//
10
11#include "FuzzerCommand.h"
12#include "FuzzerFork.h"
13#include "FuzzerIO.h"
14#include "FuzzerInternal.h"
15#include "FuzzerMerge.h"
16#include "FuzzerSHA1.h"
17#include "FuzzerTracePC.h"
18#include "FuzzerUtil.h"
19
20#include <atomic>
21#include <chrono>
22#include <condition_variable>
23#include <fstream>
24#include <memory>
25#include <mutex>
26#include <queue>
27#include <sstream>
28#include <thread>
29
30namespace fuzzer {
31
32struct Stats {
33 size_t number_of_executed_units = 0;
34 size_t peak_rss_mb = 0;
35 size_t average_exec_per_sec = 0;
36};
37
38static Stats ParseFinalStatsFromLog(const std::string &LogPath) {
39 std::ifstream In(LogPath);
40 std::string Line;
41 Stats Res;
42 struct {
43 const char *Name;
44 size_t *Var;
45 } NameVarPairs[] = {
46 {"stat::number_of_executed_units:", &Res.number_of_executed_units},
47 {"stat::peak_rss_mb:", &Res.peak_rss_mb},
48 {"stat::average_exec_per_sec:", &Res.average_exec_per_sec},
49 {nullptr, nullptr},
50 };
51 while (std::getline(In, Line, '\n')) {
52 if (Line.find("stat::") != 0) continue;
53 std::istringstream ISS(Line);
54 std::string Name;
55 size_t Val;
56 ISS >> Name >> Val;
57 for (size_t i = 0; NameVarPairs[i].Name; i++)
58 if (Name == NameVarPairs[i].Name)
59 *NameVarPairs[i].Var = Val;
60 }
61 return Res;
62}
63
64struct FuzzJob {
65 // Inputs.
66 Command Cmd;
67 std::string CorpusDir;
68 std::string FeaturesDir;
69 std::string LogPath;
70 std::string SeedListPath;
71 std::string CFPath;
72 size_t JobId;
73
74 int DftTimeInSeconds = 0;
75
76 // Fuzzing Outputs.
77 int ExitCode;
78
79 ~FuzzJob() {
80 RemoveFile(CFPath);
81 RemoveFile(LogPath);
82 RemoveFile(SeedListPath);
83 RmDirRecursive(CorpusDir);
84 RmDirRecursive(FeaturesDir);
85 }
86};
87
88struct GlobalEnv {
89 std::vector<std::string> Args;
90 std::vector<std::string> CorpusDirs;
91 std::string MainCorpusDir;
92 std::string TempDir;
93 std::string DFTDir;
94 std::string DataFlowBinary;
95 std::set<uint32_t> Features, Cov;
96 std::set<std::string> FilesWithDFT;
97 std::vector<std::string> Files;
98 std::vector<std::size_t> FilesSizes;
99 Random *Rand;
100 std::chrono::system_clock::time_point ProcessStartTime;
101 int Verbosity = 0;
102 int Group = 0;
103 int NumCorpuses = 8;
104
105 size_t NumTimeouts = 0;
106 size_t NumOOMs = 0;
107 size_t NumCrashes = 0;
108
109
110 size_t NumRuns = 0;
111
112 std::string StopFile() { return DirPlusFile(TempDir, "STOP"); }
113
114 size_t secondsSinceProcessStartUp() const {
115 return std::chrono::duration_cast<std::chrono::seconds>(
116 std::chrono::system_clock::now() - ProcessStartTime)
117 .count();
118 }
119
120 FuzzJob *CreateNewJob(size_t JobId) {
121 Command Cmd(Args);
122 Cmd.removeFlag("fork");
123 Cmd.removeFlag("runs");
124 Cmd.removeFlag("collect_data_flow");
125 for (auto &C : CorpusDirs) // Remove all corpora from the args.
126 Cmd.removeArgument(C);
127 Cmd.addFlag("reload", "0"); // working in an isolated dir, no reload.
128 Cmd.addFlag("print_final_stats", "1");
129 Cmd.addFlag("print_funcs", "0"); // no need to spend time symbolizing.
130 Cmd.addFlag("max_total_time", std::to_string(std::min((size_t)300, JobId)));
131 Cmd.addFlag("stop_file", StopFile());
132 if (!DataFlowBinary.empty()) {
133 Cmd.addFlag("data_flow_trace", DFTDir);
134 if (!Cmd.hasFlag("focus_function"))
135 Cmd.addFlag("focus_function", "auto");
136 }
137 auto Job = new FuzzJob;
138 std::string Seeds;
139 if (size_t CorpusSubsetSize =
140 std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) {
141 auto Time1 = std::chrono::system_clock::now();
142 if (Group) { // whether to group the corpus.
143 size_t AverageCorpusSize = Files.size() / NumCorpuses + 1;
144 size_t StartIndex = ((JobId - 1) % NumCorpuses) * AverageCorpusSize;
145 for (size_t i = 0; i < CorpusSubsetSize; i++) {
146 size_t RandNum = (*Rand)(AverageCorpusSize);
147 size_t Index = RandNum + StartIndex;
148 Index = Index < Files.size() ? Index
149 : Rand->SkewTowardsLast(Files.size());
150 auto &SF = Files[Index];
151 Seeds += (Seeds.empty() ? "" : ",") + SF;
152 CollectDFT(SF);
153 }
154 } else {
155 for (size_t i = 0; i < CorpusSubsetSize; i++) {
156 auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
157 Seeds += (Seeds.empty() ? "" : ",") + SF;
158 CollectDFT(SF);
159 }
160 }
161 auto Time2 = std::chrono::system_clock::now();
162 auto DftTimeInSeconds = duration_cast<seconds>(Time2 - Time1).count();
163 assert(DftTimeInSeconds < std::numeric_limits<int>::max());
164 Job->DftTimeInSeconds = static_cast<int>(DftTimeInSeconds);
165 }
166 if (!Seeds.empty()) {
167 Job->SeedListPath =
168 DirPlusFile(TempDir, std::to_string(JobId) + ".seeds");
169 WriteToFile(Seeds, Job->SeedListPath);
170 Cmd.addFlag("seed_inputs", "@" + Job->SeedListPath);
171 }
172 Job->LogPath = DirPlusFile(TempDir, std::to_string(JobId) + ".log");
173 Job->CorpusDir = DirPlusFile(TempDir, "C" + std::to_string(JobId));
174 Job->FeaturesDir = DirPlusFile(TempDir, "F" + std::to_string(JobId));
175 Job->CFPath = DirPlusFile(TempDir, std::to_string(JobId) + ".merge");
176 Job->JobId = JobId;
177
178
179 Cmd.addArgument(Job->CorpusDir);
180 Cmd.addFlag("features_dir", Job->FeaturesDir);
181
182 for (auto &D : {Job->CorpusDir, Job->FeaturesDir}) {
183 RmDirRecursive(D);
184 MkDir(D);
185 }
186
187 Cmd.setOutputFile(Job->LogPath);
188 Cmd.combineOutAndErr();
189
190 Job->Cmd = Cmd;
191
192 if (Verbosity >= 2)
193 Printf("Job %zd/%p Created: %s\n", JobId, Job,
194 Job->Cmd.toString().c_str());
195 // Start from very short runs and gradually increase them.
196 return Job;
197 }
198
199 void RunOneMergeJob(FuzzJob *Job) {
200 auto Stats = ParseFinalStatsFromLog(Job->LogPath);
201 NumRuns += Stats.number_of_executed_units;
202
203 std::vector<SizedFile> TempFiles, MergeCandidates;
204 // Read all newly created inputs and their feature sets.
205 // Choose only those inputs that have new features.
206 GetSizedFilesFromDir(Job->CorpusDir, &TempFiles);
207 std::sort(TempFiles.begin(), TempFiles.end());
208 for (auto &F : TempFiles) {
209 auto FeatureFile = F.File;
210 FeatureFile.replace(0, Job->CorpusDir.size(), Job->FeaturesDir);
211 auto FeatureBytes = FileToVector(FeatureFile, 0, false);
212 assert((FeatureBytes.size() % sizeof(uint32_t)) == 0);
213 std::vector<uint32_t> NewFeatures(FeatureBytes.size() / sizeof(uint32_t));
214 memcpy(NewFeatures.data(), FeatureBytes.data(), FeatureBytes.size());
215 for (auto Ft : NewFeatures) {
216 if (!Features.count(Ft)) {
217 MergeCandidates.push_back(F);
218 break;
219 }
220 }
221 }
222 // if (!FilesToAdd.empty() || Job->ExitCode != 0)
223 Printf("#%zd: cov: %zd ft: %zd corp: %zd exec/s: %zd "
224 "oom/timeout/crash: %zd/%zd/%zd time: %zds job: %zd dft_time: %d\n",
225 NumRuns, Cov.size(), Features.size(), Files.size(),
226 Stats.average_exec_per_sec, NumOOMs, NumTimeouts, NumCrashes,
227 secondsSinceProcessStartUp(), Job->JobId, Job->DftTimeInSeconds);
228
229 if (MergeCandidates.empty()) return;
230
231 std::vector<std::string> FilesToAdd;
232 std::set<uint32_t> NewFeatures, NewCov;
233 bool IsSetCoverMerge =
234 !Job->Cmd.getFlagValue("set_cover_merge").compare("1");
235 CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features,
236 &NewFeatures, Cov, &NewCov, Job->CFPath, false,
237 IsSetCoverMerge);
238 for (auto &Path : FilesToAdd) {
239 auto U = FileToVector(Path);
240 auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
241 WriteToFile(U, NewPath);
242 if (Group) { // Insert the queue according to the size of the seed.
243 size_t UnitSize = U.size();
244 auto Idx =
245 std::upper_bound(FilesSizes.begin(), FilesSizes.end(), UnitSize) -
246 FilesSizes.begin();
247 FilesSizes.insert(FilesSizes.begin() + Idx, UnitSize);
248 Files.insert(Files.begin() + Idx, NewPath);
249 } else {
250 Files.push_back(NewPath);
251 }
252 }
253 Features.insert(NewFeatures.begin(), NewFeatures.end());
254 Cov.insert(NewCov.begin(), NewCov.end());
255 for (auto Idx : NewCov)
256 if (auto *TE = TPC.PCTableEntryByIdx(Idx))
257 if (TPC.PcIsFuncEntry(TE))
258 PrintPC(" NEW_FUNC: %p %F %L\n", "",
259 TPC.GetNextInstructionPc(TE->PC));
260 }
261
262 void CollectDFT(const std::string &InputPath) {
263 if (DataFlowBinary.empty()) return;
264 if (!FilesWithDFT.insert(InputPath).second) return;
265 Command Cmd(Args);
266 Cmd.removeFlag("fork");
267 Cmd.removeFlag("runs");
268 Cmd.addFlag("data_flow_trace", DFTDir);
269 Cmd.addArgument(InputPath);
270 for (auto &C : CorpusDirs) // Remove all corpora from the args.
271 Cmd.removeArgument(C);
272 Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log"));
273 Cmd.combineOutAndErr();
274 // Printf("CollectDFT: %s\n", Cmd.toString().c_str());
275 ExecuteCommand(Cmd);
276 }
277
278};
279
280struct JobQueue {
281 std::queue<FuzzJob *> Qu;
282 std::mutex Mu;
283 std::condition_variable Cv;
284
285 void Push(FuzzJob *Job) {
286 {
287 std::lock_guard<std::mutex> Lock(Mu);
288 Qu.push(Job);
289 }
290 Cv.notify_one();
291 }
292 FuzzJob *Pop() {
293 std::unique_lock<std::mutex> Lk(Mu);
294 // std::lock_guard<std::mutex> Lock(Mu);
295 Cv.wait(Lk, [&]{return !Qu.empty();});
296 assert(!Qu.empty());
297 auto Job = Qu.front();
298 Qu.pop();
299 return Job;
300 }
301};
302
303void WorkerThread(JobQueue *FuzzQ, JobQueue *MergeQ) {
304 while (auto Job = FuzzQ->Pop()) {
305 // Printf("WorkerThread: job %p\n", Job);
306 Job->ExitCode = ExecuteCommand(Job->Cmd);
307 MergeQ->Push(Job);
308 }
309}
310
311// This is just a skeleton of an experimental -fork=1 feature.
312void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
313 const std::vector<std::string> &Args,
314 const std::vector<std::string> &CorpusDirs, int NumJobs) {
315 Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs);
316
317 GlobalEnv Env;
318 Env.Args = Args;
319 Env.CorpusDirs = CorpusDirs;
320 Env.Rand = &Rand;
321 Env.Verbosity = Options.Verbosity;
322 Env.ProcessStartTime = std::chrono::system_clock::now();
323 Env.DataFlowBinary = Options.CollectDataFlow;
324 Env.Group = Options.ForkCorpusGroups;
325
326 std::vector<SizedFile> SeedFiles;
327 for (auto &Dir : CorpusDirs)
328 GetSizedFilesFromDir(Dir, &SeedFiles);
329 std::sort(SeedFiles.begin(), SeedFiles.end());
330 Env.TempDir = TempPath("FuzzWithFork", ".dir");
331 Env.DFTDir = DirPlusFile(Env.TempDir, "DFT");
332 RmDirRecursive(Env.TempDir); // in case there is a leftover from old runs.
333 MkDir(Env.TempDir);
334 MkDir(Env.DFTDir);
335
336
337 if (CorpusDirs.empty())
338 MkDir(Env.MainCorpusDir = DirPlusFile(Env.TempDir, "C"));
339 else
340 Env.MainCorpusDir = CorpusDirs[0];
341
342 if (Options.KeepSeed) {
343 for (auto &File : SeedFiles)
344 Env.Files.push_back(File.File);
345 } else {
346 auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
347 std::set<uint32_t> NewFeatures, NewCov;
348 CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, Env.Features,
349 &NewFeatures, Env.Cov, &NewCov, CFPath,
350 /*Verbose=*/false, /*IsSetCoverMerge=*/false);
351 Env.Features.insert(NewFeatures.begin(), NewFeatures.end());
352 Env.Cov.insert(NewCov.begin(), NewCov.end());
353 RemoveFile(CFPath);
354 }
355
356 if (Env.Group) {
357 for (auto &path : Env.Files)
358 Env.FilesSizes.push_back(FileSize(path));
359 }
360
361 Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
362 Env.Files.size(), Env.TempDir.c_str());
363
364 int ExitCode = 0;
365
366 JobQueue FuzzQ, MergeQ;
367
368 auto StopJobs = [&]() {
369 for (int i = 0; i < NumJobs; i++)
370 FuzzQ.Push(nullptr);
371 MergeQ.Push(nullptr);
372 WriteToFile(Unit({1}), Env.StopFile());
373 };
374
375 size_t MergeCycle = 20;
376 size_t JobExecuted = 0;
377 size_t JobId = 1;
378 std::vector<std::thread> Threads;
379 for (int t = 0; t < NumJobs; t++) {
380 Threads.push_back(std::thread(WorkerThread, &FuzzQ, &MergeQ));
381 FuzzQ.Push(Env.CreateNewJob(JobId++));
382 }
383
384 while (true) {
385 std::unique_ptr<FuzzJob> Job(MergeQ.Pop());
386 if (!Job)
387 break;
388 ExitCode = Job->ExitCode;
389 if (ExitCode == Options.InterruptExitCode) {
390 Printf("==%lu== libFuzzer: a child was interrupted; exiting\n", GetPid());
391 StopJobs();
392 break;
393 }
394 Fuzzer::MaybeExitGracefully();
395
396 Env.RunOneMergeJob(Job.get());
397
398 // merge the corpus .
399 JobExecuted++;
400 if (Env.Group && JobExecuted >= MergeCycle) {
401 std::vector<SizedFile> CurrentSeedFiles;
402 for (auto &Dir : CorpusDirs)
403 GetSizedFilesFromDir(Dir, &CurrentSeedFiles);
404 std::sort(CurrentSeedFiles.begin(), CurrentSeedFiles.end());
405
406 auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
407 std::set<uint32_t> TmpNewFeatures, TmpNewCov;
408 std::set<uint32_t> TmpFeatures, TmpCov;
409 Env.Files.clear();
410 Env.FilesSizes.clear();
411 CrashResistantMerge(Env.Args, {}, CurrentSeedFiles, &Env.Files,
412 TmpFeatures, &TmpNewFeatures, TmpCov, &TmpNewCov,
413 CFPath, /*Verbose=*/false, /*IsSetCoverMerge=*/false);
414 for (auto &path : Env.Files)
415 Env.FilesSizes.push_back(FileSize(path));
416 RemoveFile(CFPath);
417 JobExecuted = 0;
418 MergeCycle += 5;
419 }
420
421 // Since the number of corpus seeds will gradually increase, in order to
422 // control the number in each group to be about three times the number of
423 // seeds selected each time, the number of groups is dynamically adjusted.
424 if (Env.Files.size() < 2000)
425 Env.NumCorpuses = 12;
426 else if (Env.Files.size() < 6000)
427 Env.NumCorpuses = 20;
428 else if (Env.Files.size() < 12000)
429 Env.NumCorpuses = 32;
430 else if (Env.Files.size() < 16000)
431 Env.NumCorpuses = 40;
432 else if (Env.Files.size() < 24000)
433 Env.NumCorpuses = 60;
434 else
435 Env.NumCorpuses = 80;
436
437 // Continue if our crash is one of the ignored ones.
438 if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode)
439 Env.NumTimeouts++;
440 else if (Options.IgnoreOOMs && ExitCode == Options.OOMExitCode)
441 Env.NumOOMs++;
442 else if (ExitCode != 0) {
443 Env.NumCrashes++;
444 if (Options.IgnoreCrashes) {
445 std::ifstream In(Job->LogPath);
446 std::string Line;
447 while (std::getline(In, Line, '\n'))
448 if (Line.find("ERROR:") != Line.npos ||
449 Line.find("runtime error:") != Line.npos)
450 Printf("%s\n", Line.c_str());
451 } else {
452 // And exit if we don't ignore this crash.
453 Printf("INFO: log from the inner process:\n%s",
454 FileToString(Job->LogPath).c_str());
455 StopJobs();
456 break;
457 }
458 }
459
460 // Stop if we are over the time budget.
461 // This is not precise, since other threads are still running
462 // and we will wait while joining them.
463 // We also don't stop instantly: other jobs need to finish.
464 if (Options.MaxTotalTimeSec > 0 &&
465 Env.secondsSinceProcessStartUp() >= (size_t)Options.MaxTotalTimeSec) {
466 Printf("INFO: fuzzed for %zd seconds, wrapping up soon\n",
467 Env.secondsSinceProcessStartUp());
468 StopJobs();
469 break;
470 }
471 if (Env.NumRuns >= Options.MaxNumberOfRuns) {
472 Printf("INFO: fuzzed for %zd iterations, wrapping up soon\n",
473 Env.NumRuns);
474 StopJobs();
475 break;
476 }
477
478 FuzzQ.Push(Env.CreateNewJob(JobId++));
479 }
480
481 for (auto &T : Threads)
482 T.join();
483
484 // The workers have terminated. Don't try to remove the directory before they
485 // terminate to avoid a race condition preventing cleanup on Windows.
486 RmDirRecursive(Env.TempDir);
487
488 // Use the exit code from the last child process.
489 Printf("INFO: exiting: %d time: %zds\n", ExitCode,
490 Env.secondsSinceProcessStartUp());
491 exit(ExitCode);
492}
493
494} // namespace fuzzer
495