kbobyrev updated this revision to Diff 161204.
kbobyrev added a comment.

Add couple tests, fix formatting issues, use `__builtin_trap()` instead of 
`assert` in fuzzer so that it's more transparent.

Also, fuzzing this unreadable version for a couple of hours suggests that it is 



Index: llvm/unittests/Support/YAMLIOTest.cpp
--- llvm/unittests/Support/YAMLIOTest.cpp
+++ llvm/unittests/Support/YAMLIOTest.cpp
@@ -16,16 +16,17 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+using llvm::yaml::Hex16;
+using llvm::yaml::Hex32;
+using llvm::yaml::Hex64;
+using llvm::yaml::Hex8;
 using llvm::yaml::Input;
-using llvm::yaml::Output;
 using llvm::yaml::IO;
-using llvm::yaml::MappingTraits;
+using llvm::yaml::isNumeric;
 using llvm::yaml::MappingNormalization;
+using llvm::yaml::MappingTraits;
+using llvm::yaml::Output;
 using llvm::yaml::ScalarTraits;
-using llvm::yaml::Hex8;
-using llvm::yaml::Hex16;
-using llvm::yaml::Hex32;
-using llvm::yaml::Hex64;
 using ::testing::StartsWith;
@@ -2569,3 +2570,73 @@
     TestEscaped((char const *)foobar, "\"foo\\u200Bbar\"");
+TEST(YAMLIO, Numeric) {
+  EXPECT_TRUE(isNumeric(".inf"));
+  EXPECT_TRUE(isNumeric(".INF"));
+  EXPECT_TRUE(isNumeric(".Inf"));
+  EXPECT_TRUE(isNumeric("-.inf"));
+  EXPECT_TRUE(isNumeric("+.inf"));
+  EXPECT_TRUE(isNumeric(".nan"));
+  EXPECT_TRUE(isNumeric(".NaN"));
+  EXPECT_TRUE(isNumeric(".NAN"));
+  EXPECT_TRUE(isNumeric("0"));
+  EXPECT_TRUE(isNumeric("0."));
+  EXPECT_TRUE(isNumeric("0.0"));
+  EXPECT_TRUE(isNumeric("-0.0"));
+  EXPECT_TRUE(isNumeric("+0.0"));
+  EXPECT_TRUE(isNumeric("12345"));
+  EXPECT_TRUE(isNumeric("012345"));
+  EXPECT_TRUE(isNumeric("+12.0"));
+  EXPECT_TRUE(isNumeric(".5"));
+  EXPECT_TRUE(isNumeric("+.5"));
+  EXPECT_TRUE(isNumeric("-1.0"));
+  EXPECT_TRUE(isNumeric("2.3e4"));
+  EXPECT_TRUE(isNumeric("-2E+05"));
+  EXPECT_TRUE(isNumeric("+12e03"));
+  EXPECT_TRUE(isNumeric("6.8523015e+5"));
+  EXPECT_TRUE(isNumeric("1.e+1"));
+  EXPECT_TRUE(isNumeric(".0e+1"));
+  EXPECT_TRUE(isNumeric("0x2aF3"));
+  EXPECT_TRUE(isNumeric("0o01234567"));
+  EXPECT_FALSE(isNumeric("not a number"));
+  EXPECT_FALSE(isNumeric("."));
+  EXPECT_FALSE(isNumeric(".e+1"));
+  EXPECT_FALSE(isNumeric(".1e"));
+  EXPECT_FALSE(isNumeric(".1e+"));
+  EXPECT_FALSE(isNumeric(".1e++1"));
+  EXPECT_FALSE(isNumeric("ABCD"));
+  EXPECT_FALSE(isNumeric("+0x2AF3"));
+  EXPECT_FALSE(isNumeric("-0x2AF3"));
+  EXPECT_FALSE(isNumeric("0x2AF3Z"));
+  EXPECT_FALSE(isNumeric("0o012345678"));
+  EXPECT_FALSE(isNumeric("0xZ"));
+  EXPECT_FALSE(isNumeric("-0o012345678"));
+  EXPECT_FALSE(isNumeric("000003A8229434B839616A25C16B0291F77A438B"));
+  EXPECT_FALSE(isNumeric(""));
+  EXPECT_FALSE(isNumeric("."));
+  EXPECT_FALSE(isNumeric(".e+1"));
+  EXPECT_FALSE(isNumeric(".e+"));
+  EXPECT_FALSE(isNumeric(".e"));
+  EXPECT_FALSE(isNumeric("e1"));
+  // Deprecated formats: as for YAML 1.2 specification, the following are not
+  // valid numbers anymore:
+  //
+  // * Sexagecimal numbers
+  // * Decimal numbers with comma s the delimiter
+  // * "inf", "nan" without '.' prefix
+  EXPECT_FALSE(isNumeric("3:25:45"));
+  EXPECT_FALSE(isNumeric("+12,345"));
+  EXPECT_FALSE(isNumeric("-inf"));
+  EXPECT_FALSE(isNumeric("1,230.15"));
Index: llvm/tools/llvm-yaml-numeric-parser-fuzzer/yaml-numeric-parser-fuzzer.cpp
--- /dev/null
+++ llvm/tools/llvm-yaml-numeric-parser-fuzzer/yaml-numeric-parser-fuzzer.cpp
@@ -0,0 +1,47 @@
+//===--- special-case-list-fuzzer.cpp - Fuzzer for special case lists -----===//
+//                     The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <cassert>
+#include <string>
+llvm::Regex Infinity("^[-+]?(\\.inf|\\.Inf|\\.INF)$");
+llvm::Regex Base8("^0o[0-7]+$");
+llvm::Regex Base16("^0x[0-9a-fA-F]+$");
+llvm::Regex Float("^[-+]?(\\.[0-9]+|[0-9]+(\\.[0-9]*)?)([eE][-+]?[0-9]+)?$");
+inline bool isNumericRegex(llvm::StringRef S) {
+  if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN"))
+    return true;
+  if (Infinity.match(S))
+    return true;
+  if (Base8.match(S))
+    return true;
+  if (Base16.match(S))
+    return true;
+  if (Float.match(S))
+    return true;
+  return false;
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  std::string Input(reinterpret_cast<const char *>(Data), Size);
+  Input.erase(std::remove(Input.begin(), Input.end(), 0), Input.end());
+  if (!Input.empty() && llvm::yaml::isNumeric(Input) != isNumericRegex(Input))
+      __builtin_trap();
+  return 0;
Index: llvm/tools/llvm-yaml-numeric-parser-fuzzer/DummyYAMLNumericParserFuzzer.cpp
--- /dev/null
+++ llvm/tools/llvm-yaml-numeric-parser-fuzzer/DummyYAMLNumericParserFuzzer.cpp
@@ -0,0 +1,19 @@
+//===--- DummyYAMLNumericParserFuzzer.cpp ---------------------------------===//
+//                     The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// Implementation of main so we can build and test without linking libFuzzer.
+#include "llvm/FuzzMutate/FuzzerCLI.h"
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);
+int main(int argc, char *argv[]) {
+  return llvm::runFuzzerOnInputs(argc, argv, LLVMFuzzerTestOneInput);
Index: llvm/tools/llvm-yaml-numeric-parser-fuzzer/CMakeLists.txt
--- /dev/null
+++ llvm/tools/llvm-yaml-numeric-parser-fuzzer/CMakeLists.txt
@@ -0,0 +1,9 @@
+  Support
+  FuzzMutate
+  yaml-numeric-parser-fuzzer.cpp
+  DUMMY_MAIN DummyYAMLNumericParserFuzzer.cpp
+  )
Index: llvm/include/llvm/Support/YAMLTraits.h
--- llvm/include/llvm/Support/YAMLTraits.h
+++ llvm/include/llvm/Support/YAMLTraits.h
@@ -27,13 +27,15 @@
 #include <cctype>
 #include <cstddef>
 #include <cstdint>
+#include <iterator>
 #include <map>
 #include <memory>
 #include <new>
 #include <string>
 #include <system_error>
 #include <type_traits>
 #include <vector>
+#include <cassert>
 namespace llvm {
 namespace yaml {
@@ -449,46 +451,101 @@
   static bool const value = (sizeof(test<DocumentListTraits<T>>(nullptr))==1);
-inline bool isNumber(StringRef S) {
-  static const char OctalChars[] = "01234567";
-  if (S.startswith("0") &&
-      S.drop_front().find_first_not_of(OctalChars) == StringRef::npos)
-    return true;
+inline StringRef skipDigits(StringRef S) {
+  size_t Digits = 0;
+  while (Digits != S.size() && std::strchr("0123456789", S[Digits]) != nullptr)
+    ++Digits;
+  return S.drop_front(Digits);
-  if (S.startswith("0o") &&
-      S.drop_front(2).find_first_not_of(OctalChars) == StringRef::npos)
-    return true;
+inline bool isNumeric(StringRef S) {
+  // Make S.front() and S.drop_front().front() (if S.front() is [+-]) calls
+  // safe.
+  if (S.empty() || S.equals("+") || S.equals("-"))
+    return false;
-  static const char HexChars[] = "0123456789abcdefABCDEF";
-  if (S.startswith("0x") &&
-      S.drop_front(2).find_first_not_of(HexChars) == StringRef::npos)
+  if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN"))
     return true;
-  static const char DecChars[] = "0123456789";
-  if (S.find_first_not_of(DecChars) == StringRef::npos)
-    return true;
+  // Infinity and decimal numbers can be prefixed with sign.
+  StringRef Tail = (S.front() == '-' || S.front() == '+') ? S.drop_front() : S;
-  if (S.equals(".inf") || S.equals(".Inf") || S.equals(".INF"))
+  // Check for infinity first, because checking for hex and oct numbers is more
+  // expensive.
+  if (Tail.equals(".inf") || Tail.equals(".Inf") || Tail.equals(".INF"))
     return true;
-  Regex FloatMatcher("^(\\.[0-9]+|[0-9]+(\\.[0-9]*)?)([eE][-+]?[0-9]+)?$");
-  if (FloatMatcher.match(S))
-    return true;
+  // Section 10.3.2 Tag Resolution
+  // YAML 1.2 Specification prohibits Base 8 and Base 16 numbers prefixed with
+  // [-+], so S should be used instead of Tail.
+  if (S.startswith("0o"))
+    return S.size() > 2 &&
+           S.drop_front(2).find_first_not_of("01234567") == StringRef::npos;
+  if (S.startswith("0x"))
+    return S.size() > 2 &&
+           S.drop_front(2).find_first_not_of("0123456789abcdefABCDEF") ==
+             StringRef::npos;
+  // Parse float: [-+]? (\. [0-9]+ | [0-9]+ (\. [0-9]* )?) ([eE] [-+]? [0-9]+)?
+  S = Tail;
+  // Handle cases when the number starts with '.' and hence needs at least one
+  // digit after dot (as opposed by number which has digits before the dot), but
+  // doesn't have one.
+  if (S.startswith(".") &&
+      (S.equals(".") || (S.size() > 1 && std::strchr("0123456789",
+                                                     S[1]) == nullptr)))
+    return false;
+  if (S.startswith("E") || S.startswith("e"))
+    return false;
+  enum ParseState {
+    Default,
+    FoundDot,
+    FoundExponent,
+  };
+  ParseState State = Default;
-  return false;
+  S = skipDigits(S);
-inline bool isNumeric(StringRef S) {
-  if ((S.front() == '-' || S.front() == '+') && isNumber(S.drop_front()))
+  // Accept decimal integer.
+  if (S.empty()) {
     return true;
+  } else if (S.front() == '.') {
+    State = FoundDot;
+    S = S.drop_front();
+  } else if (S.front() == 'e' || S.front() == 'E') {
+    State = FoundExponent;
+    S = S.drop_front();
+  } else {
+    return false;
+  }
-  if (isNumber(S))
-    return true;
+  if (State == FoundDot) {
+    S = skipDigits(S);
+    if (S.empty()) {
+      return true;
+    } else if (S.front() == 'e' || S.front() == 'E') {
+      State = FoundExponent;
+      S = S.drop_front();
+    } else {
+      return false;
+    }
+  }
-  if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN"))
-    return true;
+  assert(FoundExponent && "Should have found exponent at this point.");
+  if (S.empty())
+    return false;
+  if (S.front() == '+' || S.front() == '-') {
+    S = S.drop_front();
+    if (S.empty())
+      return false;
+  }
-  return false;
+  return skipDigits(S).empty();
 inline bool isNull(StringRef S) {
cfe-commits mailing list

Reply via email to