[PATCH] D50839: [llvm] Optimize YAML::isNumeric

Kirill Bobyrev via Phabricator via cfe-commits Thu, 16 Aug 2018 08:03:31 -0700

kbobyrev updated this revision to Diff 161033.
kbobyrev added a comment.

Use consistent `Regex` matchers naming: don't append "Matcher" at the end.



https://reviews.llvm.org/D50839

Files:
  llvm/include/llvm/Support/YAMLTraits.h
  llvm/tools/llvm-yaml-numeric-parser-fuzzer/CMakeLists.txt
  llvm/tools/llvm-yaml-numeric-parser-fuzzer/DummyYAMLNumericParserFuzzer.cpp
  llvm/tools/llvm-yaml-numeric-parser-fuzzer/yaml-numeric-parser-fuzzer.cpp
  llvm/unittests/Support/YAMLIOTest.cpp

Index: llvm/unittests/Support/YAMLIOTest.cpp
===================================================================
--- llvm/unittests/Support/YAMLIOTest.cpp
+++ llvm/unittests/Support/YAMLIOTest.cpp
@@ -16,16 +16,17 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
+using llvm::yaml::Hex16;
+using llvm::yaml::Hex32;
+using llvm::yaml::Hex64;
+using llvm::yaml::Hex8;
 using llvm::yaml::Input;
-using llvm::yaml::Output;
 using llvm::yaml::IO;
-using llvm::yaml::MappingTraits;
+using llvm::yaml::isNumeric;
 using llvm::yaml::MappingNormalization;
+using llvm::yaml::MappingTraits;
+using llvm::yaml::Output;
 using llvm::yaml::ScalarTraits;
-using llvm::yaml::Hex8;
-using llvm::yaml::Hex16;
-using llvm::yaml::Hex32;
-using llvm::yaml::Hex64;
 using ::testing::StartsWith;
 
 
@@ -2569,3 +2570,64 @@
     TestEscaped((char const *)foobar, "\"foo\\u200Bbar\"");
   }
 }
+
+TEST(YAMLIO, Numeric) {
+  EXPECT_TRUE(isNumeric(".inf"));
+  EXPECT_TRUE(isNumeric(".INF"));
+  EXPECT_TRUE(isNumeric(".Inf"));
+  EXPECT_TRUE(isNumeric("-.inf"));
+  EXPECT_TRUE(isNumeric("+.inf"));
+
+  EXPECT_TRUE(isNumeric(".nan"));
+  EXPECT_TRUE(isNumeric(".NaN"));
+  EXPECT_TRUE(isNumeric(".NAN"));
+
+  EXPECT_TRUE(isNumeric("0"));
+  EXPECT_TRUE(isNumeric("0."));
+  EXPECT_TRUE(isNumeric("0.0"));
+  EXPECT_TRUE(isNumeric("-0.0"));
+  EXPECT_TRUE(isNumeric("+0.0"));
+
+  EXPECT_TRUE(isNumeric("12345"));
+  EXPECT_TRUE(isNumeric("012345"));
+  EXPECT_TRUE(isNumeric("+12.0"));
+  EXPECT_TRUE(isNumeric(".5"));
+  EXPECT_TRUE(isNumeric("+.5"));
+  EXPECT_TRUE(isNumeric("-1.0"));
+
+  EXPECT_TRUE(isNumeric("2.3e4"));
+  EXPECT_TRUE(isNumeric("-2E+05"));
+  EXPECT_TRUE(isNumeric("+12e03"));
+  EXPECT_TRUE(isNumeric("6.8523015e+5"));
+
+  EXPECT_TRUE(isNumeric("1.e+1"));
+  EXPECT_TRUE(isNumeric(".0e+1"));
+
+  EXPECT_TRUE(isNumeric("0x2aF3"));
+  EXPECT_TRUE(isNumeric("0o01234567"));
+
+  EXPECT_FALSE(isNumeric("not a number"));
+  EXPECT_FALSE(isNumeric("."));
+  EXPECT_FALSE(isNumeric(".e+1"));
+  EXPECT_FALSE(isNumeric(".1e"));
+  EXPECT_FALSE(isNumeric(".1e+"));
+  EXPECT_FALSE(isNumeric(".1e++1"));
+
+  EXPECT_FALSE(isNumeric("+0x2AF3"));
+  EXPECT_FALSE(isNumeric("-0x2AF3"));
+  EXPECT_FALSE(isNumeric("0x2AF3Z"));
+  EXPECT_FALSE(isNumeric("0o012345678"));
+  EXPECT_FALSE(isNumeric("-0o012345678"));
+  EXPECT_FALSE(isNumeric("000003A8229434B839616A25C16B0291F77A438B"));
+
+  // Deprecated formats: as for YAML 1.2 specification, the following are not
+  // valid numbers anymore:
+  //
+  // * Sexagecimal numbers
+  // * Decimal numbers with comma s the delimiter
+  // * "inf", "nan" without '.' prefix
+  EXPECT_FALSE(isNumeric("3:25:45"));
+  EXPECT_FALSE(isNumeric("+12,345"));
+  EXPECT_FALSE(isNumeric("-inf"));
+  EXPECT_FALSE(isNumeric("1,230.15"));
+}
Index: llvm/tools/llvm-yaml-numeric-parser-fuzzer/yaml-numeric-parser-fuzzer.cpp
===================================================================
--- /dev/null
+++ llvm/tools/llvm-yaml-numeric-parser-fuzzer/yaml-numeric-parser-fuzzer.cpp
@@ -0,0 +1,44 @@
+//===--- special-case-list-fuzzer.cpp - Fuzzer for special case lists -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <cassert>
+#include <string>
+
+llvm::Regex Inifnity("^[-+]?(\\.inf|\\.Inf|\\.INF)$");
+llvm::Regex Base8("^0o[0-7]+$");
+llvm::Regex Base16("^0x[0-9a-fA-F]+$");
+llvm::Regex Float("^[-+]?(\\.[0-9]+|[0-9]+(\\.[0-9]*)?)([eE][-+]?[0-9]+)?$");
+
+inline bool isNumericRegex(llvm::StringRef S) {
+  if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN"))
+    return true;
+
+  if (Inifnity.match(S))
+    return true;
+
+  if (Base8.match(S))
+    return true;
+
+  if (Base16.match(S))
+    return true;
+
+  if (Float.match(S))
+    return true;
+
+  return false;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  std::string Input(reinterpret_cast<const char *>(Data), Size);
+  assert(llvm::yaml::isNumeric(Input) == isNumericRegex(Input));
+  return 0;
+}
Index: llvm/tools/llvm-yaml-numeric-parser-fuzzer/DummyYAMLNumericParserFuzzer.cpp
===================================================================
--- /dev/null
+++ llvm/tools/llvm-yaml-numeric-parser-fuzzer/DummyYAMLNumericParserFuzzer.cpp
@@ -0,0 +1,19 @@
+//===--- DummyYAMLNumericParserFuzzer.cpp ----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of main so we can build and test without linking libFuzzer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/FuzzMutate/FuzzerCLI.h"
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);
+int main(int argc, char *argv[]) {
+  return llvm::runFuzzerOnInputs(argc, argv, LLVMFuzzerTestOneInput);
+}
Index: llvm/tools/llvm-yaml-numeric-parser-fuzzer/CMakeLists.txt
===================================================================
--- /dev/null
+++ llvm/tools/llvm-yaml-numeric-parser-fuzzer/CMakeLists.txt
@@ -0,0 +1,9 @@
+set(LLVM_LINK_COMPONENTS
+  Support
+  FuzzMutate
+)
+
+add_llvm_fuzzer(llvm-yaml-numeric-parser-fuzzer
+  yaml-numeric-parser-fuzzer.cpp
+  DUMMY_MAIN DummyYAMLNumericParserFuzzer.cpp
+  )
Index: llvm/include/llvm/Support/YAMLTraits.h
===================================================================
--- llvm/include/llvm/Support/YAMLTraits.h
+++ llvm/include/llvm/Support/YAMLTraits.h
@@ -27,6 +27,7 @@
 #include <cctype>
 #include <cstddef>
 #include <cstdint>
+#include <iterator>
 #include <map>
 #include <memory>
 #include <new>
@@ -449,46 +450,69 @@
   static bool const value = (sizeof(test<DocumentListTraits<T>>(nullptr))==1);
 };
 
-inline bool isNumber(StringRef S) {
-  static const char OctalChars[] = "01234567";
-  if (S.startswith("0") &&
-      S.drop_front().find_first_not_of(OctalChars) == StringRef::npos)
-    return true;
-
-  if (S.startswith("0o") &&
-      S.drop_front(2).find_first_not_of(OctalChars) == StringRef::npos)
-    return true;
+inline bool isNumeric(StringRef S) {
+  if (S.empty())
+    return false;
 
-  static const char HexChars[] = "0123456789abcdefABCDEF";
-  if (S.startswith("0x") &&
-      S.drop_front(2).find_first_not_of(HexChars) == StringRef::npos)
+  if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN"))
     return true;
 
-  static const char DecChars[] = "0123456789";
-  if (S.find_first_not_of(DecChars) == StringRef::npos)
-    return true;
+  // Infinity and decimal numbers can be prefixed with sign.
+  StringRef Tail = (S.front() == '-' || S.front() == '+') ? S.drop_front() : S;
 
-  if (S.equals(".inf") || S.equals(".Inf") || S.equals(".INF"))
+  // Check for infinity first, because checking for hex and oct numbers is more
+  // expensive.
+  if (Tail.equals(".inf") || Tail.equals(".Inf") || Tail.equals(".INF"))
     return true;
 
-  Regex FloatMatcher("^(\\.[0-9]+|[0-9]+(\\.[0-9]*)?)([eE][-+]?[0-9]+)?$");
-  if (FloatMatcher.match(S))
+  bool ParseHex = S.startswith("0x");
+  bool ParseOct = S.startswith("0o");
+  if (ParseHex || ParseOct) {
+    if (S.size() < 3)
+      return false;
+    for (const auto &Char : S.drop_front(2)) {
+      if (ParseHex && std::strchr("0123456789abcdefABCDEF", Char) == nullptr)
+        return false;
+      if (ParseOct && std::strchr("01234567", Char) == nullptr)
+        return false;
+    }
     return true;
+  }
 
-  return false;
-}
-
-inline bool isNumeric(StringRef S) {
-  if ((S.front() == '-' || S.front() == '+') && isNumber(S.drop_front()))
-    return true;
+  static const char DecChars[] = "0123456789";
 
-  if (isNumber(S))
-    return true;
+  // Parse float: [-+]? (\. [0-9]+ | [0-9]+ (\. [0-9]* )?) ([eE] [-+]? [0-9]+)?
+  bool FoundDot = false;
+  bool FoundExponent = false;
+  for (size_t I = 0; I < Tail.size(); ++I) {
+    char Symbol = Tail[I];
+    if (Symbol == '.') {
+      // There can only be one dot in the number.
+      if (FoundDot)
+        return false;
+      FoundDot = true;
+      // If string starts with '.' it has to be followed by at least one digit.
+      if (I == 0 && (Tail.size() == 1 || Tail.find_first_of(DecChars) != 1))
+        return false;
+    } else if (Symbol == 'e' || Symbol == 'E') {
+      // There can only be one exponent sign in the number.
+      if (FoundExponent)
+        return false;
+      FoundExponent = true;
+    } else if (Symbol == '+' || Symbol == '-') {
+      // Sign can only follow an exponent sign.
+      if (!FoundExponent || (Tail[I - 1] != 'e' && Tail[I - 1] != 'E'))
+        return false;
+    } else if ('0' > Symbol || Symbol > '9') {
+      return false;
+    }
+  }
 
-  if (S.equals(".nan") || S.equals(".NaN") || S.equals(".NAN"))
-    return true;
+  // Exponent sign has been found: it should be followed by at least one digit.
+  if (FoundExponent)
+    return ('0' <= S.back() && S.back() <= '9');
 
-  return false;
+  return true;
 }
 
 inline bool isNull(StringRef S) {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D50839: [llvm] Optimize YAML::isNumeric

Reply via email to