| 1 | //===-- yaml-parser-fuzzer.cpp - Fuzzer for YAML parser -------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "llvm/ADT/StringRef.h" |
| 10 | #include "llvm/Support/YAMLParser.h" |
| 11 | |
| 12 | using namespace llvm; |
| 13 | |
| 14 | static bool isValidYaml(const uint8_t *Data, size_t Size) { |
| 15 | SourceMgr SM; |
| 16 | yaml::Stream Stream(StringRef(reinterpret_cast<const char *>(Data), Size), |
| 17 | SM); |
| 18 | return Stream.validate(); |
| 19 | } |
| 20 | |
| 21 | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { |
| 22 | std::vector<uint8_t> Input(Data, Data + Size); |
| 23 | |
| 24 | // Ensure we don't crash on any arbitrary byte string. |
| 25 | isValidYaml(Data: Input.data(), Size: Input.size()); |
| 26 | |
| 27 | // Ensure we don't crash on byte strings with no null characters. |
| 28 | llvm::erase(C&: Input, V: 0); |
| 29 | Input.shrink_to_fit(); |
| 30 | bool IsValidWithout0s = isValidYaml(Data: Input.data(), Size: Input.size()); |
| 31 | |
| 32 | // Ensure we don't crash on byte strings where the only null character is |
| 33 | // one-past-the-end of the actual input to the parser. |
| 34 | Input.push_back(x: 0); |
| 35 | Input.shrink_to_fit(); |
| 36 | bool IsValidWhen0Terminated = isValidYaml(Data: Input.data(), Size: Input.size() - 1); |
| 37 | |
| 38 | // Ensure we don't crash on byte strings with no null characters, but with |
| 39 | // an invalid character one-past-the-end of the actual input to the parser. |
| 40 | Input.back() = 1; |
| 41 | bool IsValidWhen1Terminated = isValidYaml(Data: Input.data(), Size: Input.size() - 1); |
| 42 | |
| 43 | // The parser should either accept all of these inputs, or reject all of |
| 44 | // them, because the parser sees an identical byte string in each case. This |
| 45 | // should hopefully catch some cases where the parser is sensitive to what is |
| 46 | // present one-past-the-end of the actual input. |
| 47 | if (IsValidWithout0s != IsValidWhen0Terminated || |
| 48 | IsValidWhen0Terminated != IsValidWhen1Terminated) |
| 49 | LLVM_BUILTIN_TRAP; |
| 50 | |
| 51 | return 0; |
| 52 | } |
| 53 | |