1 | //===- FuzzerFork.cpp - run fuzzing in separate subprocesses --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // Spawn and orchestrate separate fuzzing processes. |
9 | //===----------------------------------------------------------------------===// |
10 | |
11 | #include "FuzzerCommand.h" |
12 | #include "FuzzerFork.h" |
13 | #include "FuzzerIO.h" |
14 | #include "FuzzerInternal.h" |
15 | #include "FuzzerMerge.h" |
16 | #include "FuzzerSHA1.h" |
17 | #include "FuzzerTracePC.h" |
18 | #include "FuzzerUtil.h" |
19 | |
20 | #include <atomic> |
21 | #include <chrono> |
22 | #include <condition_variable> |
23 | #include <fstream> |
24 | #include <memory> |
25 | #include <mutex> |
26 | #include <queue> |
27 | #include <sstream> |
28 | #include <thread> |
29 | |
30 | namespace fuzzer { |
31 | |
32 | struct Stats { |
33 | size_t number_of_executed_units = 0; |
34 | size_t peak_rss_mb = 0; |
35 | size_t average_exec_per_sec = 0; |
36 | }; |
37 | |
38 | static Stats ParseFinalStatsFromLog(const std::string &LogPath) { |
39 | std::ifstream In(LogPath); |
40 | std::string Line; |
41 | Stats Res; |
42 | struct { |
43 | const char *Name; |
44 | size_t *Var; |
45 | } NameVarPairs[] = { |
46 | {"stat::number_of_executed_units:" , &Res.number_of_executed_units}, |
47 | {"stat::peak_rss_mb:" , &Res.peak_rss_mb}, |
48 | {"stat::average_exec_per_sec:" , &Res.average_exec_per_sec}, |
49 | {nullptr, nullptr}, |
50 | }; |
51 | while (std::getline(In, Line, '\n')) { |
52 | if (Line.find("stat::" ) != 0) continue; |
53 | std::istringstream ISS(Line); |
54 | std::string Name; |
55 | size_t Val; |
56 | ISS >> Name >> Val; |
57 | for (size_t i = 0; NameVarPairs[i].Name; i++) |
58 | if (Name == NameVarPairs[i].Name) |
59 | *NameVarPairs[i].Var = Val; |
60 | } |
61 | return Res; |
62 | } |
63 | |
64 | struct FuzzJob { |
65 | // Inputs. |
66 | Command Cmd; |
67 | std::string CorpusDir; |
68 | std::string FeaturesDir; |
69 | std::string LogPath; |
70 | std::string SeedListPath; |
71 | std::string CFPath; |
72 | size_t JobId; |
73 | |
74 | int DftTimeInSeconds = 0; |
75 | |
76 | // Fuzzing Outputs. |
77 | int ExitCode; |
78 | |
79 | ~FuzzJob() { |
80 | RemoveFile(CFPath); |
81 | RemoveFile(LogPath); |
82 | RemoveFile(SeedListPath); |
83 | RmDirRecursive(CorpusDir); |
84 | RmDirRecursive(FeaturesDir); |
85 | } |
86 | }; |
87 | |
88 | struct GlobalEnv { |
89 | std::vector<std::string> Args; |
90 | std::vector<std::string> CorpusDirs; |
91 | std::string MainCorpusDir; |
92 | std::string TempDir; |
93 | std::string DFTDir; |
94 | std::string DataFlowBinary; |
95 | std::set<uint32_t> Features, Cov; |
96 | std::set<std::string> FilesWithDFT; |
97 | std::vector<std::string> Files; |
98 | std::vector<std::size_t> FilesSizes; |
99 | Random *Rand; |
100 | std::chrono::system_clock::time_point ProcessStartTime; |
101 | int Verbosity = 0; |
102 | int Group = 0; |
103 | int NumCorpuses = 8; |
104 | |
105 | size_t NumTimeouts = 0; |
106 | size_t NumOOMs = 0; |
107 | size_t NumCrashes = 0; |
108 | |
109 | |
110 | size_t NumRuns = 0; |
111 | |
112 | std::string StopFile() { return DirPlusFile(TempDir, "STOP" ); } |
113 | |
114 | size_t secondsSinceProcessStartUp() const { |
115 | return std::chrono::duration_cast<std::chrono::seconds>( |
116 | std::chrono::system_clock::now() - ProcessStartTime) |
117 | .count(); |
118 | } |
119 | |
120 | FuzzJob *CreateNewJob(size_t JobId) { |
121 | Command Cmd(Args); |
122 | Cmd.removeFlag("fork" ); |
123 | Cmd.removeFlag("runs" ); |
124 | Cmd.removeFlag("collect_data_flow" ); |
125 | for (auto &C : CorpusDirs) // Remove all corpora from the args. |
126 | Cmd.removeArgument(C); |
127 | Cmd.addFlag("reload" , "0" ); // working in an isolated dir, no reload. |
128 | Cmd.addFlag("print_final_stats" , "1" ); |
129 | Cmd.addFlag("print_funcs" , "0" ); // no need to spend time symbolizing. |
130 | Cmd.addFlag("max_total_time" , std::to_string(std::min((size_t)300, JobId))); |
131 | Cmd.addFlag("stop_file" , StopFile()); |
132 | if (!DataFlowBinary.empty()) { |
133 | Cmd.addFlag("data_flow_trace" , DFTDir); |
134 | if (!Cmd.hasFlag("focus_function" )) |
135 | Cmd.addFlag("focus_function" , "auto" ); |
136 | } |
137 | auto Job = new FuzzJob; |
138 | std::string Seeds; |
139 | if (size_t CorpusSubsetSize = |
140 | std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) { |
141 | auto Time1 = std::chrono::system_clock::now(); |
142 | if (Group) { // whether to group the corpus. |
143 | size_t AverageCorpusSize = Files.size() / NumCorpuses + 1; |
144 | size_t StartIndex = ((JobId - 1) % NumCorpuses) * AverageCorpusSize; |
145 | for (size_t i = 0; i < CorpusSubsetSize; i++) { |
146 | size_t RandNum = (*Rand)(AverageCorpusSize); |
147 | size_t Index = RandNum + StartIndex; |
148 | Index = Index < Files.size() ? Index |
149 | : Rand->SkewTowardsLast(Files.size()); |
150 | auto &SF = Files[Index]; |
151 | Seeds += (Seeds.empty() ? "" : "," ) + SF; |
152 | CollectDFT(SF); |
153 | } |
154 | } else { |
155 | for (size_t i = 0; i < CorpusSubsetSize; i++) { |
156 | auto &SF = Files[Rand->SkewTowardsLast(Files.size())]; |
157 | Seeds += (Seeds.empty() ? "" : "," ) + SF; |
158 | CollectDFT(SF); |
159 | } |
160 | } |
161 | auto Time2 = std::chrono::system_clock::now(); |
162 | auto DftTimeInSeconds = duration_cast<seconds>(Time2 - Time1).count(); |
163 | assert(DftTimeInSeconds < std::numeric_limits<int>::max()); |
164 | Job->DftTimeInSeconds = static_cast<int>(DftTimeInSeconds); |
165 | } |
166 | if (!Seeds.empty()) { |
167 | Job->SeedListPath = |
168 | DirPlusFile(TempDir, std::to_string(JobId) + ".seeds" ); |
169 | WriteToFile(Seeds, Job->SeedListPath); |
170 | Cmd.addFlag("seed_inputs" , "@" + Job->SeedListPath); |
171 | } |
172 | Job->LogPath = DirPlusFile(TempDir, std::to_string(JobId) + ".log" ); |
173 | Job->CorpusDir = DirPlusFile(TempDir, "C" + std::to_string(JobId)); |
174 | Job->FeaturesDir = DirPlusFile(TempDir, "F" + std::to_string(JobId)); |
175 | Job->CFPath = DirPlusFile(TempDir, std::to_string(JobId) + ".merge" ); |
176 | Job->JobId = JobId; |
177 | |
178 | |
179 | Cmd.addArgument(Job->CorpusDir); |
180 | Cmd.addFlag("features_dir" , Job->FeaturesDir); |
181 | |
182 | for (auto &D : {Job->CorpusDir, Job->FeaturesDir}) { |
183 | RmDirRecursive(D); |
184 | MkDir(D); |
185 | } |
186 | |
187 | Cmd.setOutputFile(Job->LogPath); |
188 | Cmd.combineOutAndErr(); |
189 | |
190 | Job->Cmd = Cmd; |
191 | |
192 | if (Verbosity >= 2) |
193 | Printf("Job %zd/%p Created: %s\n" , JobId, Job, |
194 | Job->Cmd.toString().c_str()); |
195 | // Start from very short runs and gradually increase them. |
196 | return Job; |
197 | } |
198 | |
199 | void RunOneMergeJob(FuzzJob *Job) { |
200 | auto Stats = ParseFinalStatsFromLog(Job->LogPath); |
201 | NumRuns += Stats.number_of_executed_units; |
202 | |
203 | std::vector<SizedFile> TempFiles, MergeCandidates; |
204 | // Read all newly created inputs and their feature sets. |
205 | // Choose only those inputs that have new features. |
206 | GetSizedFilesFromDir(Job->CorpusDir, &TempFiles); |
207 | std::sort(TempFiles.begin(), TempFiles.end()); |
208 | for (auto &F : TempFiles) { |
209 | auto FeatureFile = F.File; |
210 | FeatureFile.replace(0, Job->CorpusDir.size(), Job->FeaturesDir); |
211 | auto FeatureBytes = FileToVector(FeatureFile, 0, false); |
212 | assert((FeatureBytes.size() % sizeof(uint32_t)) == 0); |
213 | std::vector<uint32_t> NewFeatures(FeatureBytes.size() / sizeof(uint32_t)); |
214 | memcpy(NewFeatures.data(), FeatureBytes.data(), FeatureBytes.size()); |
215 | for (auto Ft : NewFeatures) { |
216 | if (!Features.count(Ft)) { |
217 | MergeCandidates.push_back(F); |
218 | break; |
219 | } |
220 | } |
221 | } |
222 | // if (!FilesToAdd.empty() || Job->ExitCode != 0) |
223 | Printf("#%zd: cov: %zd ft: %zd corp: %zd exec/s: %zd " |
224 | "oom/timeout/crash: %zd/%zd/%zd time: %zds job: %zd dft_time: %d\n" , |
225 | NumRuns, Cov.size(), Features.size(), Files.size(), |
226 | Stats.average_exec_per_sec, NumOOMs, NumTimeouts, NumCrashes, |
227 | secondsSinceProcessStartUp(), Job->JobId, Job->DftTimeInSeconds); |
228 | |
229 | if (MergeCandidates.empty()) return; |
230 | |
231 | std::vector<std::string> FilesToAdd; |
232 | std::set<uint32_t> NewFeatures, NewCov; |
233 | bool IsSetCoverMerge = |
234 | !Job->Cmd.getFlagValue("set_cover_merge" ).compare("1" ); |
235 | CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features, |
236 | &NewFeatures, Cov, &NewCov, Job->CFPath, false, |
237 | IsSetCoverMerge); |
238 | for (auto &Path : FilesToAdd) { |
239 | auto U = FileToVector(Path); |
240 | auto NewPath = DirPlusFile(MainCorpusDir, Hash(U)); |
241 | WriteToFile(U, NewPath); |
242 | if (Group) { // Insert the queue according to the size of the seed. |
243 | size_t UnitSize = U.size(); |
244 | auto Idx = |
245 | std::upper_bound(FilesSizes.begin(), FilesSizes.end(), UnitSize) - |
246 | FilesSizes.begin(); |
247 | FilesSizes.insert(FilesSizes.begin() + Idx, UnitSize); |
248 | Files.insert(Files.begin() + Idx, NewPath); |
249 | } else { |
250 | Files.push_back(NewPath); |
251 | } |
252 | } |
253 | Features.insert(NewFeatures.begin(), NewFeatures.end()); |
254 | Cov.insert(NewCov.begin(), NewCov.end()); |
255 | for (auto Idx : NewCov) |
256 | if (auto *TE = TPC.PCTableEntryByIdx(Idx)) |
257 | if (TPC.PcIsFuncEntry(TE)) |
258 | PrintPC(" NEW_FUNC: %p %F %L\n" , "" , |
259 | TPC.GetNextInstructionPc(TE->PC)); |
260 | } |
261 | |
262 | void CollectDFT(const std::string &InputPath) { |
263 | if (DataFlowBinary.empty()) return; |
264 | if (!FilesWithDFT.insert(InputPath).second) return; |
265 | Command Cmd(Args); |
266 | Cmd.removeFlag("fork" ); |
267 | Cmd.removeFlag("runs" ); |
268 | Cmd.addFlag("data_flow_trace" , DFTDir); |
269 | Cmd.addArgument(InputPath); |
270 | for (auto &C : CorpusDirs) // Remove all corpora from the args. |
271 | Cmd.removeArgument(C); |
272 | Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log" )); |
273 | Cmd.combineOutAndErr(); |
274 | // Printf("CollectDFT: %s\n", Cmd.toString().c_str()); |
275 | ExecuteCommand(Cmd); |
276 | } |
277 | |
278 | }; |
279 | |
280 | struct JobQueue { |
281 | std::queue<FuzzJob *> Qu; |
282 | std::mutex Mu; |
283 | std::condition_variable Cv; |
284 | |
285 | void Push(FuzzJob *Job) { |
286 | { |
287 | std::lock_guard<std::mutex> Lock(Mu); |
288 | Qu.push(Job); |
289 | } |
290 | Cv.notify_one(); |
291 | } |
292 | FuzzJob *Pop() { |
293 | std::unique_lock<std::mutex> Lk(Mu); |
294 | // std::lock_guard<std::mutex> Lock(Mu); |
295 | Cv.wait(Lk, [&]{return !Qu.empty();}); |
296 | assert(!Qu.empty()); |
297 | auto Job = Qu.front(); |
298 | Qu.pop(); |
299 | return Job; |
300 | } |
301 | }; |
302 | |
303 | void WorkerThread(JobQueue *FuzzQ, JobQueue *MergeQ) { |
304 | while (auto Job = FuzzQ->Pop()) { |
305 | // Printf("WorkerThread: job %p\n", Job); |
306 | Job->ExitCode = ExecuteCommand(Job->Cmd); |
307 | MergeQ->Push(Job); |
308 | } |
309 | } |
310 | |
311 | // This is just a skeleton of an experimental -fork=1 feature. |
312 | void FuzzWithFork(Random &Rand, const FuzzingOptions &Options, |
313 | const std::vector<std::string> &Args, |
314 | const std::vector<std::string> &CorpusDirs, int NumJobs) { |
315 | Printf("INFO: -fork=%d: fuzzing in separate process(s)\n" , NumJobs); |
316 | |
317 | GlobalEnv Env; |
318 | Env.Args = Args; |
319 | Env.CorpusDirs = CorpusDirs; |
320 | Env.Rand = &Rand; |
321 | Env.Verbosity = Options.Verbosity; |
322 | Env.ProcessStartTime = std::chrono::system_clock::now(); |
323 | Env.DataFlowBinary = Options.CollectDataFlow; |
324 | Env.Group = Options.ForkCorpusGroups; |
325 | |
326 | std::vector<SizedFile> SeedFiles; |
327 | for (auto &Dir : CorpusDirs) |
328 | GetSizedFilesFromDir(Dir, &SeedFiles); |
329 | std::sort(SeedFiles.begin(), SeedFiles.end()); |
330 | Env.TempDir = TempPath("FuzzWithFork" , ".dir" ); |
331 | Env.DFTDir = DirPlusFile(Env.TempDir, "DFT" ); |
332 | RmDirRecursive(Env.TempDir); // in case there is a leftover from old runs. |
333 | MkDir(Env.TempDir); |
334 | MkDir(Env.DFTDir); |
335 | |
336 | |
337 | if (CorpusDirs.empty()) |
338 | MkDir(Env.MainCorpusDir = DirPlusFile(Env.TempDir, "C" )); |
339 | else |
340 | Env.MainCorpusDir = CorpusDirs[0]; |
341 | |
342 | if (Options.KeepSeed) { |
343 | for (auto &File : SeedFiles) |
344 | Env.Files.push_back(File.File); |
345 | } else { |
346 | auto CFPath = DirPlusFile(Env.TempDir, "merge.txt" ); |
347 | std::set<uint32_t> NewFeatures, NewCov; |
348 | CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, Env.Features, |
349 | &NewFeatures, Env.Cov, &NewCov, CFPath, |
350 | /*Verbose=*/false, /*IsSetCoverMerge=*/false); |
351 | Env.Features.insert(NewFeatures.begin(), NewFeatures.end()); |
352 | Env.Cov.insert(NewCov.begin(), NewCov.end()); |
353 | RemoveFile(CFPath); |
354 | } |
355 | |
356 | if (Env.Group) { |
357 | for (auto &path : Env.Files) |
358 | Env.FilesSizes.push_back(FileSize(path)); |
359 | } |
360 | |
361 | Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n" , NumJobs, |
362 | Env.Files.size(), Env.TempDir.c_str()); |
363 | |
364 | int ExitCode = 0; |
365 | |
366 | JobQueue FuzzQ, MergeQ; |
367 | |
368 | auto StopJobs = [&]() { |
369 | for (int i = 0; i < NumJobs; i++) |
370 | FuzzQ.Push(nullptr); |
371 | MergeQ.Push(nullptr); |
372 | WriteToFile(Unit({1}), Env.StopFile()); |
373 | }; |
374 | |
375 | size_t MergeCycle = 20; |
376 | size_t JobExecuted = 0; |
377 | size_t JobId = 1; |
378 | std::vector<std::thread> Threads; |
379 | for (int t = 0; t < NumJobs; t++) { |
380 | Threads.push_back(std::thread(WorkerThread, &FuzzQ, &MergeQ)); |
381 | FuzzQ.Push(Env.CreateNewJob(JobId++)); |
382 | } |
383 | |
384 | while (true) { |
385 | std::unique_ptr<FuzzJob> Job(MergeQ.Pop()); |
386 | if (!Job) |
387 | break; |
388 | ExitCode = Job->ExitCode; |
389 | if (ExitCode == Options.InterruptExitCode) { |
390 | Printf("==%lu== libFuzzer: a child was interrupted; exiting\n" , GetPid()); |
391 | StopJobs(); |
392 | break; |
393 | } |
394 | Fuzzer::MaybeExitGracefully(); |
395 | |
396 | Env.RunOneMergeJob(Job.get()); |
397 | |
398 | // merge the corpus . |
399 | JobExecuted++; |
400 | if (Env.Group && JobExecuted >= MergeCycle) { |
401 | std::vector<SizedFile> CurrentSeedFiles; |
402 | for (auto &Dir : CorpusDirs) |
403 | GetSizedFilesFromDir(Dir, &CurrentSeedFiles); |
404 | std::sort(CurrentSeedFiles.begin(), CurrentSeedFiles.end()); |
405 | |
406 | auto CFPath = DirPlusFile(Env.TempDir, "merge.txt" ); |
407 | std::set<uint32_t> TmpNewFeatures, TmpNewCov; |
408 | std::set<uint32_t> TmpFeatures, TmpCov; |
409 | Env.Files.clear(); |
410 | Env.FilesSizes.clear(); |
411 | CrashResistantMerge(Env.Args, {}, CurrentSeedFiles, &Env.Files, |
412 | TmpFeatures, &TmpNewFeatures, TmpCov, &TmpNewCov, |
413 | CFPath, /*Verbose=*/false, /*IsSetCoverMerge=*/false); |
414 | for (auto &path : Env.Files) |
415 | Env.FilesSizes.push_back(FileSize(path)); |
416 | RemoveFile(CFPath); |
417 | JobExecuted = 0; |
418 | MergeCycle += 5; |
419 | } |
420 | |
421 | // Since the number of corpus seeds will gradually increase, in order to |
422 | // control the number in each group to be about three times the number of |
423 | // seeds selected each time, the number of groups is dynamically adjusted. |
424 | if (Env.Files.size() < 2000) |
425 | Env.NumCorpuses = 12; |
426 | else if (Env.Files.size() < 6000) |
427 | Env.NumCorpuses = 20; |
428 | else if (Env.Files.size() < 12000) |
429 | Env.NumCorpuses = 32; |
430 | else if (Env.Files.size() < 16000) |
431 | Env.NumCorpuses = 40; |
432 | else if (Env.Files.size() < 24000) |
433 | Env.NumCorpuses = 60; |
434 | else |
435 | Env.NumCorpuses = 80; |
436 | |
437 | // Continue if our crash is one of the ignored ones. |
438 | if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode) |
439 | Env.NumTimeouts++; |
440 | else if (Options.IgnoreOOMs && ExitCode == Options.OOMExitCode) |
441 | Env.NumOOMs++; |
442 | else if (ExitCode != 0) { |
443 | Env.NumCrashes++; |
444 | if (Options.IgnoreCrashes) { |
445 | std::ifstream In(Job->LogPath); |
446 | std::string Line; |
447 | while (std::getline(In, Line, '\n')) |
448 | if (Line.find("ERROR:" ) != Line.npos || |
449 | Line.find("runtime error:" ) != Line.npos) |
450 | Printf("%s\n" , Line.c_str()); |
451 | } else { |
452 | // And exit if we don't ignore this crash. |
453 | Printf("INFO: log from the inner process:\n%s" , |
454 | FileToString(Job->LogPath).c_str()); |
455 | StopJobs(); |
456 | break; |
457 | } |
458 | } |
459 | |
460 | // Stop if we are over the time budget. |
461 | // This is not precise, since other threads are still running |
462 | // and we will wait while joining them. |
463 | // We also don't stop instantly: other jobs need to finish. |
464 | if (Options.MaxTotalTimeSec > 0 && |
465 | Env.secondsSinceProcessStartUp() >= (size_t)Options.MaxTotalTimeSec) { |
466 | Printf("INFO: fuzzed for %zd seconds, wrapping up soon\n" , |
467 | Env.secondsSinceProcessStartUp()); |
468 | StopJobs(); |
469 | break; |
470 | } |
471 | if (Env.NumRuns >= Options.MaxNumberOfRuns) { |
472 | Printf("INFO: fuzzed for %zd iterations, wrapping up soon\n" , |
473 | Env.NumRuns); |
474 | StopJobs(); |
475 | break; |
476 | } |
477 | |
478 | FuzzQ.Push(Env.CreateNewJob(JobId++)); |
479 | } |
480 | |
481 | for (auto &T : Threads) |
482 | T.join(); |
483 | |
484 | // The workers have terminated. Don't try to remove the directory before they |
485 | // terminate to avoid a race condition preventing cleanup on Windows. |
486 | RmDirRecursive(Env.TempDir); |
487 | |
488 | // Use the exit code from the last child process. |
489 | Printf("INFO: exiting: %d time: %zds\n" , ExitCode, |
490 | Env.secondsSinceProcessStartUp()); |
491 | exit(ExitCode); |
492 | } |
493 | |
494 | } // namespace fuzzer |
495 | |