| 1 | //===- FuzzerCorpus.h - Internal header for the Fuzzer ----------*- C++ -* ===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // fuzzer::InputCorpus |
| 9 | //===----------------------------------------------------------------------===// |
| 10 | |
| 11 | #ifndef LLVM_FUZZER_CORPUS |
| 12 | #define LLVM_FUZZER_CORPUS |
| 13 | |
| 14 | #include "FuzzerDataFlowTrace.h" |
| 15 | #include "FuzzerDefs.h" |
| 16 | #include "FuzzerIO.h" |
| 17 | #include "FuzzerRandom.h" |
| 18 | #include "FuzzerSHA1.h" |
| 19 | #include "FuzzerTracePC.h" |
| 20 | #include <algorithm> |
| 21 | #include <bitset> |
| 22 | #include <chrono> |
| 23 | #include <numeric> |
| 24 | #include <random> |
| 25 | #include <unordered_set> |
| 26 | |
| 27 | namespace fuzzer { |
| 28 | |
| 29 | struct InputInfo { |
| 30 | Unit U; // The actual input data. |
| 31 | std::chrono::microseconds TimeOfUnit; |
| 32 | uint8_t Sha1[kSHA1NumBytes]; // Checksum. |
| 33 | // Number of features that this input has and no smaller input has. |
| 34 | size_t NumFeatures = 0; |
| 35 | size_t Tmp = 0; // Used by ValidateFeatureSet. |
| 36 | // Stats. |
| 37 | size_t NumExecutedMutations = 0; |
| 38 | size_t NumSuccessfullMutations = 0; |
| 39 | bool NeverReduce = false; |
| 40 | bool MayDeleteFile = false; |
| 41 | bool Reduced = false; |
| 42 | bool HasFocusFunction = false; |
| 43 | std::vector<uint32_t> UniqFeatureSet; |
| 44 | std::vector<uint8_t> DataFlowTraceForFocusFunction; |
| 45 | // Power schedule. |
| 46 | bool NeedsEnergyUpdate = false; |
| 47 | double Energy = 0.0; |
| 48 | double SumIncidence = 0.0; |
| 49 | std::vector<std::pair<uint32_t, uint16_t>> FeatureFreqs; |
| 50 | |
| 51 | // Delete feature Idx and its frequency from FeatureFreqs. |
| 52 | bool DeleteFeatureFreq(uint32_t Idx) { |
| 53 | if (FeatureFreqs.empty()) |
| 54 | return false; |
| 55 | |
| 56 | // Binary search over local feature frequencies sorted by index. |
| 57 | auto Lower = std::lower_bound(first: FeatureFreqs.begin(), last: FeatureFreqs.end(), |
| 58 | value: std::pair<uint32_t, uint16_t>(Idx, 0)); |
| 59 | |
| 60 | if (Lower != FeatureFreqs.end() && Lower->first == Idx) { |
| 61 | FeatureFreqs.erase(position: Lower); |
| 62 | return true; |
| 63 | } |
| 64 | return false; |
| 65 | } |
| 66 | |
| 67 | // Assign more energy to a high-entropy seed, i.e., that reveals more |
| 68 | // information about the globally rare features in the neighborhood of the |
| 69 | // seed. Since we do not know the entropy of a seed that has never been |
| 70 | // executed we assign fresh seeds maximum entropy and let II->Energy approach |
| 71 | // the true entropy from above. If ScalePerExecTime is true, the computed |
| 72 | // entropy is scaled based on how fast this input executes compared to the |
| 73 | // average execution time of inputs. The faster an input executes, the more |
| 74 | // energy gets assigned to the input. |
| 75 | void UpdateEnergy(size_t GlobalNumberOfFeatures, bool ScalePerExecTime, |
| 76 | std::chrono::microseconds AverageUnitExecutionTime) { |
| 77 | Energy = 0.0; |
| 78 | SumIncidence = 0.0; |
| 79 | |
| 80 | // Apply add-one smoothing to locally discovered features. |
| 81 | for (const auto &F : FeatureFreqs) { |
| 82 | double LocalIncidence = F.second + 1; |
| 83 | Energy -= LocalIncidence * log(x: LocalIncidence); |
| 84 | SumIncidence += LocalIncidence; |
| 85 | } |
| 86 | |
| 87 | // Apply add-one smoothing to locally undiscovered features. |
| 88 | // PreciseEnergy -= 0; // since log(1.0) == 0) |
| 89 | SumIncidence += |
| 90 | static_cast<double>(GlobalNumberOfFeatures - FeatureFreqs.size()); |
| 91 | |
| 92 | // Add a single locally abundant feature apply add-one smoothing. |
| 93 | double AbdIncidence = static_cast<double>(NumExecutedMutations + 1); |
| 94 | Energy -= AbdIncidence * log(x: AbdIncidence); |
| 95 | SumIncidence += AbdIncidence; |
| 96 | |
| 97 | // Normalize. |
| 98 | if (SumIncidence != 0) |
| 99 | Energy = Energy / SumIncidence + log(x: SumIncidence); |
| 100 | |
| 101 | if (ScalePerExecTime) { |
| 102 | // Scaling to favor inputs with lower execution time. |
| 103 | uint32_t PerfScore = 100; |
| 104 | if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 10) |
| 105 | PerfScore = 10; |
| 106 | else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 4) |
| 107 | PerfScore = 25; |
| 108 | else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 2) |
| 109 | PerfScore = 50; |
| 110 | else if (TimeOfUnit.count() * 3 > AverageUnitExecutionTime.count() * 4) |
| 111 | PerfScore = 75; |
| 112 | else if (TimeOfUnit.count() * 4 < AverageUnitExecutionTime.count()) |
| 113 | PerfScore = 300; |
| 114 | else if (TimeOfUnit.count() * 3 < AverageUnitExecutionTime.count()) |
| 115 | PerfScore = 200; |
| 116 | else if (TimeOfUnit.count() * 2 < AverageUnitExecutionTime.count()) |
| 117 | PerfScore = 150; |
| 118 | |
| 119 | Energy *= PerfScore; |
| 120 | } |
| 121 | } |
| 122 | |
| 123 | // Increment the frequency of the feature Idx. |
| 124 | void UpdateFeatureFrequency(uint32_t Idx) { |
| 125 | NeedsEnergyUpdate = true; |
| 126 | |
| 127 | // The local feature frequencies is an ordered vector of pairs. |
| 128 | // If there are no local feature frequencies, push_back preserves order. |
| 129 | // Set the feature frequency for feature Idx32 to 1. |
| 130 | if (FeatureFreqs.empty()) { |
| 131 | FeatureFreqs.push_back(x: std::pair<uint32_t, uint16_t>(Idx, 1)); |
| 132 | return; |
| 133 | } |
| 134 | |
| 135 | // Binary search over local feature frequencies sorted by index. |
| 136 | auto Lower = std::lower_bound(first: FeatureFreqs.begin(), last: FeatureFreqs.end(), |
| 137 | value: std::pair<uint32_t, uint16_t>(Idx, 0)); |
| 138 | |
| 139 | // If feature Idx32 already exists, increment its frequency. |
| 140 | // Otherwise, insert a new pair right after the next lower index. |
| 141 | if (Lower != FeatureFreqs.end() && Lower->first == Idx) { |
| 142 | Lower->second++; |
| 143 | } else { |
| 144 | FeatureFreqs.insert(position: Lower, x: std::pair<uint32_t, uint16_t>(Idx, 1)); |
| 145 | } |
| 146 | } |
| 147 | }; |
| 148 | |
| 149 | struct EntropicOptions { |
| 150 | bool Enabled; |
| 151 | size_t NumberOfRarestFeatures; |
| 152 | size_t FeatureFrequencyThreshold; |
| 153 | bool ScalePerExecTime; |
| 154 | }; |
| 155 | |
| 156 | class InputCorpus { |
| 157 | static const uint32_t kFeatureSetSize = 1 << 21; |
| 158 | static const uint8_t kMaxMutationFactor = 20; |
| 159 | static const size_t kSparseEnergyUpdates = 100; |
| 160 | |
| 161 | size_t NumExecutedMutations = 0; |
| 162 | |
| 163 | EntropicOptions Entropic; |
| 164 | |
| 165 | public: |
| 166 | InputCorpus(const std::string &OutputCorpus, EntropicOptions Entropic) |
| 167 | : Entropic(Entropic), OutputCorpus(OutputCorpus) { |
| 168 | memset(s: InputSizesPerFeature, c: 0, n: sizeof(InputSizesPerFeature)); |
| 169 | memset(s: SmallestElementPerFeature, c: 0, n: sizeof(SmallestElementPerFeature)); |
| 170 | } |
| 171 | ~InputCorpus() { |
| 172 | for (auto II : Inputs) |
| 173 | delete II; |
| 174 | } |
| 175 | size_t size() const { return Inputs.size(); } |
| 176 | size_t SizeInBytes() const { |
| 177 | size_t Res = 0; |
| 178 | for (auto II : Inputs) |
| 179 | Res += II->U.size(); |
| 180 | return Res; |
| 181 | } |
| 182 | size_t NumActiveUnits() const { |
| 183 | size_t Res = 0; |
| 184 | for (auto II : Inputs) |
| 185 | Res += !II->U.empty(); |
| 186 | return Res; |
| 187 | } |
| 188 | size_t MaxInputSize() const { |
| 189 | size_t Res = 0; |
| 190 | for (auto II : Inputs) |
| 191 | Res = std::max(a: Res, b: II->U.size()); |
| 192 | return Res; |
| 193 | } |
| 194 | void IncrementNumExecutedMutations() { NumExecutedMutations++; } |
| 195 | |
| 196 | size_t NumInputsThatTouchFocusFunction() { |
| 197 | return std::count_if(first: Inputs.begin(), last: Inputs.end(), pred: [](const InputInfo *II) { |
| 198 | return II->HasFocusFunction; |
| 199 | }); |
| 200 | } |
| 201 | |
| 202 | size_t NumInputsWithDataFlowTrace() { |
| 203 | return std::count_if(first: Inputs.begin(), last: Inputs.end(), pred: [](const InputInfo *II) { |
| 204 | return !II->DataFlowTraceForFocusFunction.empty(); |
| 205 | }); |
| 206 | } |
| 207 | |
| 208 | bool empty() const { return Inputs.empty(); } |
| 209 | const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; } |
| 210 | InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile, |
| 211 | bool HasFocusFunction, bool NeverReduce, |
| 212 | std::chrono::microseconds TimeOfUnit, |
| 213 | const std::vector<uint32_t> &FeatureSet, |
| 214 | const DataFlowTrace &DFT, const InputInfo *BaseII) { |
| 215 | assert(!U.empty()); |
| 216 | if (FeatureDebug) |
| 217 | Printf(Fmt: "ADD_TO_CORPUS %zd NF %zd\n" , Inputs.size(), NumFeatures); |
| 218 | // Inputs.size() is cast to uint32_t below. |
| 219 | assert(Inputs.size() < std::numeric_limits<uint32_t>::max()); |
| 220 | Inputs.push_back(x: new InputInfo()); |
| 221 | InputInfo &II = *Inputs.back(); |
| 222 | II.U = U; |
| 223 | II.NumFeatures = NumFeatures; |
| 224 | II.NeverReduce = NeverReduce; |
| 225 | II.TimeOfUnit = TimeOfUnit; |
| 226 | II.MayDeleteFile = MayDeleteFile; |
| 227 | II.UniqFeatureSet = FeatureSet; |
| 228 | II.HasFocusFunction = HasFocusFunction; |
| 229 | // Assign maximal energy to the new seed. |
| 230 | II.Energy = RareFeatures.empty() ? 1.0 : log(x: RareFeatures.size()); |
| 231 | II.SumIncidence = static_cast<double>(RareFeatures.size()); |
| 232 | II.NeedsEnergyUpdate = false; |
| 233 | std::sort(first: II.UniqFeatureSet.begin(), last: II.UniqFeatureSet.end()); |
| 234 | ComputeSHA1(Data: U.data(), Len: U.size(), Out: II.Sha1); |
| 235 | auto Sha1Str = Sha1ToString(Sha1: II.Sha1); |
| 236 | Hashes.insert(x: Sha1Str); |
| 237 | if (HasFocusFunction) |
| 238 | if (auto V = DFT.Get(InputSha1: Sha1Str)) |
| 239 | II.DataFlowTraceForFocusFunction = *V; |
| 240 | // This is a gross heuristic. |
| 241 | // Ideally, when we add an element to a corpus we need to know its DFT. |
| 242 | // But if we don't, we'll use the DFT of its base input. |
| 243 | if (II.DataFlowTraceForFocusFunction.empty() && BaseII) |
| 244 | II.DataFlowTraceForFocusFunction = BaseII->DataFlowTraceForFocusFunction; |
| 245 | DistributionNeedsUpdate = true; |
| 246 | PrintCorpus(); |
| 247 | // ValidateFeatureSet(); |
| 248 | return &II; |
| 249 | } |
| 250 | |
| 251 | // Debug-only |
| 252 | void PrintUnit(const Unit &U) { |
| 253 | if (!FeatureDebug) return; |
| 254 | for (uint8_t C : U) { |
| 255 | if (C != 'F' && C != 'U' && C != 'Z') |
| 256 | C = '.'; |
| 257 | Printf(Fmt: "%c" , C); |
| 258 | } |
| 259 | } |
| 260 | |
| 261 | // Debug-only |
| 262 | void PrintFeatureSet(const std::vector<uint32_t> &FeatureSet) { |
| 263 | if (!FeatureDebug) return; |
| 264 | Printf(Fmt: "{" ); |
| 265 | for (uint32_t Feature: FeatureSet) |
| 266 | Printf(Fmt: "%u," , Feature); |
| 267 | Printf(Fmt: "}" ); |
| 268 | } |
| 269 | |
| 270 | // Debug-only |
| 271 | void PrintCorpus() { |
| 272 | if (!FeatureDebug) return; |
| 273 | Printf(Fmt: "======= CORPUS:\n" ); |
| 274 | int i = 0; |
| 275 | for (auto II : Inputs) { |
| 276 | if (std::find(first: II->U.begin(), last: II->U.end(), value: 'F') != II->U.end()) { |
| 277 | Printf(Fmt: "[%2d] " , i); |
| 278 | Printf(Fmt: "%s sz=%zd " , Sha1ToString(Sha1: II->Sha1).c_str(), II->U.size()); |
| 279 | PrintUnit(U: II->U); |
| 280 | Printf(Fmt: " " ); |
| 281 | PrintFeatureSet(FeatureSet: II->UniqFeatureSet); |
| 282 | Printf(Fmt: "\n" ); |
| 283 | } |
| 284 | i++; |
| 285 | } |
| 286 | } |
| 287 | |
| 288 | void Replace(InputInfo *II, const Unit &U, |
| 289 | std::chrono::microseconds TimeOfUnit) { |
| 290 | assert(II->U.size() > U.size()); |
| 291 | Hashes.erase(k: Sha1ToString(Sha1: II->Sha1)); |
| 292 | DeleteFile(II: *II); |
| 293 | ComputeSHA1(Data: U.data(), Len: U.size(), Out: II->Sha1); |
| 294 | Hashes.insert(x: Sha1ToString(Sha1: II->Sha1)); |
| 295 | II->U = U; |
| 296 | II->Reduced = true; |
| 297 | II->TimeOfUnit = TimeOfUnit; |
| 298 | DistributionNeedsUpdate = true; |
| 299 | } |
| 300 | |
| 301 | bool HasUnit(const Unit &U) { return Hashes.count(k: Hash(U)); } |
| 302 | bool HasUnit(const std::string &H) { return Hashes.count(k: H); } |
| 303 | InputInfo &ChooseUnitToMutate(Random &Rand) { |
| 304 | InputInfo &II = *Inputs[ChooseUnitIdxToMutate(Rand)]; |
| 305 | assert(!II.U.empty()); |
| 306 | return II; |
| 307 | } |
| 308 | |
| 309 | InputInfo &ChooseUnitToCrossOverWith(Random &Rand, bool UniformDist) { |
| 310 | if (!UniformDist) { |
| 311 | return ChooseUnitToMutate(Rand); |
| 312 | } |
| 313 | InputInfo &II = *Inputs[Rand(Inputs.size())]; |
| 314 | assert(!II.U.empty()); |
| 315 | return II; |
| 316 | } |
| 317 | |
| 318 | // Returns an index of random unit from the corpus to mutate. |
| 319 | size_t ChooseUnitIdxToMutate(Random &Rand) { |
| 320 | UpdateCorpusDistribution(Rand); |
| 321 | size_t Idx = static_cast<size_t>(CorpusDistribution(Rand)); |
| 322 | assert(Idx < Inputs.size()); |
| 323 | return Idx; |
| 324 | } |
| 325 | |
| 326 | void PrintStats() { |
| 327 | for (size_t i = 0; i < Inputs.size(); i++) { |
| 328 | const auto &II = *Inputs[i]; |
| 329 | Printf(Fmt: " [% 3zd %s] sz: % 5zd runs: % 5zd succ: % 5zd focus: %d\n" , i, |
| 330 | Sha1ToString(Sha1: II.Sha1).c_str(), II.U.size(), |
| 331 | II.NumExecutedMutations, II.NumSuccessfullMutations, |
| 332 | II.HasFocusFunction); |
| 333 | } |
| 334 | } |
| 335 | |
| 336 | void PrintFeatureSet() { |
| 337 | for (size_t i = 0; i < kFeatureSetSize; i++) { |
| 338 | if(size_t Sz = GetFeature(Idx: i)) |
| 339 | Printf(Fmt: "[%zd: id %zd sz%zd] " , i, SmallestElementPerFeature[i], Sz); |
| 340 | } |
| 341 | Printf(Fmt: "\n\t" ); |
| 342 | for (size_t i = 0; i < Inputs.size(); i++) |
| 343 | if (size_t N = Inputs[i]->NumFeatures) |
| 344 | Printf(Fmt: " %zd=>%zd " , i, N); |
| 345 | Printf(Fmt: "\n" ); |
| 346 | } |
| 347 | |
| 348 | void DeleteFile(const InputInfo &II) { |
| 349 | if (!OutputCorpus.empty() && II.MayDeleteFile) |
| 350 | RemoveFile(Path: DirPlusFile(DirPath: OutputCorpus, FileName: Sha1ToString(Sha1: II.Sha1))); |
| 351 | } |
| 352 | |
| 353 | void DeleteInput(size_t Idx) { |
| 354 | InputInfo &II = *Inputs[Idx]; |
| 355 | DeleteFile(II); |
| 356 | Unit().swap(x&: II.U); |
| 357 | II.Energy = 0.0; |
| 358 | II.NeedsEnergyUpdate = false; |
| 359 | DistributionNeedsUpdate = true; |
| 360 | if (FeatureDebug) |
| 361 | Printf(Fmt: "EVICTED %zd\n" , Idx); |
| 362 | } |
| 363 | |
| 364 | void AddRareFeature(uint32_t Idx) { |
| 365 | // Maintain *at least* TopXRarestFeatures many rare features |
| 366 | // and all features with a frequency below ConsideredRare. |
| 367 | // Remove all other features. |
| 368 | while (RareFeatures.size() > Entropic.NumberOfRarestFeatures && |
| 369 | FreqOfMostAbundantRareFeature > Entropic.FeatureFrequencyThreshold) { |
| 370 | |
| 371 | // Find most and second most abbundant feature. |
| 372 | uint32_t MostAbundantRareFeatureIndices[2] = {RareFeatures[0], |
| 373 | RareFeatures[0]}; |
| 374 | size_t Delete = 0; |
| 375 | for (size_t i = 0; i < RareFeatures.size(); i++) { |
| 376 | uint32_t Idx2 = RareFeatures[i]; |
| 377 | if (GlobalFeatureFreqs[Idx2] >= |
| 378 | GlobalFeatureFreqs[MostAbundantRareFeatureIndices[0]]) { |
| 379 | MostAbundantRareFeatureIndices[1] = MostAbundantRareFeatureIndices[0]; |
| 380 | MostAbundantRareFeatureIndices[0] = Idx2; |
| 381 | Delete = i; |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | // Remove most abundant rare feature. |
| 386 | IsRareFeature[Delete] = false; |
| 387 | RareFeatures[Delete] = RareFeatures.back(); |
| 388 | RareFeatures.pop_back(); |
| 389 | |
| 390 | for (auto II : Inputs) { |
| 391 | if (II->DeleteFeatureFreq(Idx: MostAbundantRareFeatureIndices[0])) |
| 392 | II->NeedsEnergyUpdate = true; |
| 393 | } |
| 394 | |
| 395 | // Set 2nd most abundant as the new most abundant feature count. |
| 396 | FreqOfMostAbundantRareFeature = |
| 397 | GlobalFeatureFreqs[MostAbundantRareFeatureIndices[1]]; |
| 398 | } |
| 399 | |
| 400 | // Add rare feature, handle collisions, and update energy. |
| 401 | RareFeatures.push_back(x: Idx); |
| 402 | IsRareFeature[Idx] = true; |
| 403 | GlobalFeatureFreqs[Idx] = 0; |
| 404 | for (auto II : Inputs) { |
| 405 | II->DeleteFeatureFreq(Idx); |
| 406 | |
| 407 | // Apply add-one smoothing to this locally undiscovered feature. |
| 408 | // Zero energy seeds will never be fuzzed and remain zero energy. |
| 409 | if (II->Energy > 0.0) { |
| 410 | II->SumIncidence += 1; |
| 411 | II->Energy += log(x: II->SumIncidence) / II->SumIncidence; |
| 412 | } |
| 413 | } |
| 414 | |
| 415 | DistributionNeedsUpdate = true; |
| 416 | } |
| 417 | |
| 418 | bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) { |
| 419 | assert(NewSize); |
| 420 | Idx = Idx % kFeatureSetSize; |
| 421 | uint32_t OldSize = GetFeature(Idx); |
| 422 | if (OldSize == 0 || (Shrink && OldSize > NewSize)) { |
| 423 | if (OldSize > 0) { |
| 424 | size_t OldIdx = SmallestElementPerFeature[Idx]; |
| 425 | InputInfo &II = *Inputs[OldIdx]; |
| 426 | assert(II.NumFeatures > 0); |
| 427 | II.NumFeatures--; |
| 428 | if (II.NumFeatures == 0) |
| 429 | DeleteInput(Idx: OldIdx); |
| 430 | } else { |
| 431 | NumAddedFeatures++; |
| 432 | if (Entropic.Enabled) |
| 433 | AddRareFeature(Idx: (uint32_t)Idx); |
| 434 | } |
| 435 | NumUpdatedFeatures++; |
| 436 | if (FeatureDebug) |
| 437 | Printf(Fmt: "ADD FEATURE %zd sz %d\n" , Idx, NewSize); |
| 438 | // Inputs.size() is guaranteed to be less than UINT32_MAX by AddToCorpus. |
| 439 | SmallestElementPerFeature[Idx] = static_cast<uint32_t>(Inputs.size()); |
| 440 | InputSizesPerFeature[Idx] = NewSize; |
| 441 | return true; |
| 442 | } |
| 443 | return false; |
| 444 | } |
| 445 | |
| 446 | // Increment frequency of feature Idx globally and locally. |
| 447 | void UpdateFeatureFrequency(InputInfo *II, size_t Idx) { |
| 448 | uint32_t Idx32 = Idx % kFeatureSetSize; |
| 449 | |
| 450 | // Saturated increment. |
| 451 | if (GlobalFeatureFreqs[Idx32] == 0xFFFF) |
| 452 | return; |
| 453 | uint16_t Freq = GlobalFeatureFreqs[Idx32]++; |
| 454 | |
| 455 | // Skip if abundant. |
| 456 | if (Freq > FreqOfMostAbundantRareFeature || !IsRareFeature[Idx32]) |
| 457 | return; |
| 458 | |
| 459 | // Update global frequencies. |
| 460 | if (Freq == FreqOfMostAbundantRareFeature) |
| 461 | FreqOfMostAbundantRareFeature++; |
| 462 | |
| 463 | // Update local frequencies. |
| 464 | if (II) |
| 465 | II->UpdateFeatureFrequency(Idx: Idx32); |
| 466 | } |
| 467 | |
| 468 | size_t NumFeatures() const { return NumAddedFeatures; } |
| 469 | size_t NumFeatureUpdates() const { return NumUpdatedFeatures; } |
| 470 | |
| 471 | private: |
| 472 | |
| 473 | static const bool FeatureDebug = false; |
| 474 | |
| 475 | uint32_t GetFeature(size_t Idx) const { return InputSizesPerFeature[Idx]; } |
| 476 | |
| 477 | void ValidateFeatureSet() { |
| 478 | if (FeatureDebug) |
| 479 | PrintFeatureSet(); |
| 480 | for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++) |
| 481 | if (GetFeature(Idx)) |
| 482 | Inputs[SmallestElementPerFeature[Idx]]->Tmp++; |
| 483 | for (auto II: Inputs) { |
| 484 | if (II->Tmp != II->NumFeatures) |
| 485 | Printf(Fmt: "ZZZ %zd %zd\n" , II->Tmp, II->NumFeatures); |
| 486 | assert(II->Tmp == II->NumFeatures); |
| 487 | II->Tmp = 0; |
| 488 | } |
| 489 | } |
| 490 | |
| 491 | // Updates the probability distribution for the units in the corpus. |
| 492 | // Must be called whenever the corpus or unit weights are changed. |
| 493 | // |
| 494 | // Hypothesis: inputs that maximize information about globally rare features |
| 495 | // are interesting. |
| 496 | void UpdateCorpusDistribution(Random &Rand) { |
| 497 | // Skip update if no seeds or rare features were added/deleted. |
| 498 | // Sparse updates for local change of feature frequencies, |
| 499 | // i.e., randomly do not skip. |
| 500 | if (!DistributionNeedsUpdate && |
| 501 | (!Entropic.Enabled || Rand(kSparseEnergyUpdates))) |
| 502 | return; |
| 503 | |
| 504 | DistributionNeedsUpdate = false; |
| 505 | |
| 506 | size_t N = Inputs.size(); |
| 507 | assert(N); |
| 508 | Intervals.resize(sz: N + 1); |
| 509 | Weights.resize(sz: N); |
| 510 | std::iota(first: Intervals.begin(), last: Intervals.end(), value: 0); |
| 511 | |
| 512 | std::chrono::microseconds AverageUnitExecutionTime(0); |
| 513 | for (auto II : Inputs) { |
| 514 | AverageUnitExecutionTime += II->TimeOfUnit; |
| 515 | } |
| 516 | AverageUnitExecutionTime /= N; |
| 517 | |
| 518 | bool VanillaSchedule = true; |
| 519 | if (Entropic.Enabled) { |
| 520 | for (auto II : Inputs) { |
| 521 | if (II->NeedsEnergyUpdate && II->Energy != 0.0) { |
| 522 | II->NeedsEnergyUpdate = false; |
| 523 | II->UpdateEnergy(GlobalNumberOfFeatures: RareFeatures.size(), ScalePerExecTime: Entropic.ScalePerExecTime, |
| 524 | AverageUnitExecutionTime); |
| 525 | } |
| 526 | } |
| 527 | |
| 528 | for (size_t i = 0; i < N; i++) { |
| 529 | |
| 530 | if (Inputs[i]->NumFeatures == 0) { |
| 531 | // If the seed doesn't represent any features, assign zero energy. |
| 532 | Weights[i] = 0.; |
| 533 | } else if (Inputs[i]->NumExecutedMutations / kMaxMutationFactor > |
| 534 | NumExecutedMutations / Inputs.size()) { |
| 535 | // If the seed was fuzzed a lot more than average, assign zero energy. |
| 536 | Weights[i] = 0.; |
| 537 | } else { |
| 538 | // Otherwise, simply assign the computed energy. |
| 539 | Weights[i] = Inputs[i]->Energy; |
| 540 | } |
| 541 | |
| 542 | // If energy for all seeds is zero, fall back to vanilla schedule. |
| 543 | if (Weights[i] > 0.0) |
| 544 | VanillaSchedule = false; |
| 545 | } |
| 546 | } |
| 547 | |
| 548 | if (VanillaSchedule) { |
| 549 | for (size_t i = 0; i < N; i++) |
| 550 | Weights[i] = |
| 551 | Inputs[i]->NumFeatures |
| 552 | ? static_cast<double>((i + 1) * |
| 553 | (Inputs[i]->HasFocusFunction ? 1000 : 1)) |
| 554 | : 0.; |
| 555 | } |
| 556 | |
| 557 | if (FeatureDebug) { |
| 558 | for (size_t i = 0; i < N; i++) |
| 559 | Printf(Fmt: "%zd " , Inputs[i]->NumFeatures); |
| 560 | Printf(Fmt: "SCORE\n" ); |
| 561 | for (size_t i = 0; i < N; i++) |
| 562 | Printf(Fmt: "%f " , Weights[i]); |
| 563 | Printf(Fmt: "Weights\n" ); |
| 564 | } |
| 565 | CorpusDistribution = std::piecewise_constant_distribution<double>( |
| 566 | Intervals.begin(), Intervals.end(), Weights.begin()); |
| 567 | } |
| 568 | std::piecewise_constant_distribution<double> CorpusDistribution; |
| 569 | |
| 570 | std::vector<double> Intervals; |
| 571 | std::vector<double> Weights; |
| 572 | |
| 573 | std::unordered_set<std::string> Hashes; |
| 574 | std::vector<InputInfo *> Inputs; |
| 575 | |
| 576 | size_t NumAddedFeatures = 0; |
| 577 | size_t NumUpdatedFeatures = 0; |
| 578 | uint32_t InputSizesPerFeature[kFeatureSetSize]; |
| 579 | uint32_t SmallestElementPerFeature[kFeatureSetSize]; |
| 580 | |
| 581 | bool DistributionNeedsUpdate = true; |
| 582 | uint16_t FreqOfMostAbundantRareFeature = 0; |
| 583 | uint16_t GlobalFeatureFreqs[kFeatureSetSize] = {}; |
| 584 | std::vector<uint32_t> RareFeatures; |
| 585 | std::bitset<kFeatureSetSize> IsRareFeature; |
| 586 | |
| 587 | std::string OutputCorpus; |
| 588 | }; |
| 589 | |
| 590 | } // namespace fuzzer |
| 591 | |
| 592 | #endif // LLVM_FUZZER_CORPUS |
| 593 | |