Author: [EMAIL PROTECTED]
Date: Mon Nov 17 03:28:48 2008
New Revision: 770

Modified:
    branches/experimental/regexp2000/src/ast.cc
    branches/experimental/regexp2000/src/ast.h
    branches/experimental/regexp2000/src/jsregexp.cc
    branches/experimental/regexp2000/src/jsregexp.h
    branches/experimental/regexp2000/src/parser.cc
    branches/experimental/regexp2000/test/cctest/test-regexp.cc

Log:
Characters in the range 0..31 and 128..65535 are now printed as \x or \u  
escapes.
This allows tests written in C++ in a simple codepage to represent the  
output.
Also avoids output containing \0, which ends string comparison.

A small optimization on quantifiers on empty atoms.


Modified: branches/experimental/regexp2000/src/ast.cc
==============================================================================
--- branches/experimental/regexp2000/src/ast.cc (original)
+++ branches/experimental/regexp2000/src/ast.cc Mon Nov 17 03:28:48 2008
@@ -190,19 +190,21 @@
  FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
  #undef MAKE_ACCEPT

-#define MAKE_CONVERSION(Name)                                        \
+#define MAKE_TYPE_CASE(Name)                                         \
    RegExp##Name* RegExpTree::As##Name() {                             \
      return NULL;                                                     \
-  }
-  FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CONVERSION)
-#undef MAKE_CONVERSION
+  }                                                                  \
+  bool RegExpTree::Is##Name() { return false; }
+  FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
+#undef MAKE_TYPE_CASE

-#define MAKE_CONVERSION(Name)                                       \
+#define MAKE_TYPE_CASE(Name)                                        \
    RegExp##Name* RegExp##Name::As##Name() {                          \
      return this;                                                    \
-  }
-FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CONVERSION)
-#undef MAKE_CONVERSION
+  }                                                                 \
+  bool RegExp##Name::Is##Name() { return true; }
+FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
+#undef MAKE_TYPE_CASE

  RegExpEmpty RegExpEmpty::kInstance;

@@ -252,12 +254,21 @@
    return NULL;
  }

+static void AddChar(StringStream* stream, uc16 character) {
+  if (character < 32 || (character >= 128 && character < 256)) {
+    stream->Add("\\x%02x", character);
+  } else if (character >= 256) {
+    stream->Add("\\u%04x", character);
+  } else {
+    stream->Add("%c", character);
+  }
+}

  void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
-  if (that.IsSingleton()) {
-    stream()->Add("%c", that.from());
-  } else {
-    stream()->Add("%c-%c", that.from(), that.to());
+  AddChar(stream(), that.from());
+  if (!that.IsSingleton()) {
+    stream()->Add("-");
+    AddChar(stream(), that.to());
    }
  }

@@ -303,7 +314,12 @@


  void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
-  stream()->Add("'%w'", that->data());
+  stream()->Add("'");
+  Vector<const uc16> chardata = that->data();
+  for (int i = 0; i < chardata.length(); i++) {
+    AddChar(stream(), chardata[i]);
+  }
+  stream()->Add("'");
    return NULL;
  }


Modified: branches/experimental/regexp2000/src/ast.h
==============================================================================
--- branches/experimental/regexp2000/src/ast.h  (original)
+++ branches/experimental/regexp2000/src/ast.h  Mon Nov 17 03:28:48 2008
@@ -1201,7 +1201,9 @@
    virtual bool IsTextElement() { return false; }
    virtual void AppendToText(RegExpText* text);
    SmartPointer<const char> ToString();
-#define MAKE_ASTYPE(Name)  virtual RegExp##Name* As##Name();
+#define MAKE_ASTYPE(Name)                                                   
\
+  virtual RegExp##Name* As##Name();                                         
\
+  virtual bool Is##Name();
    FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
  #undef MAKE_ASTYPE
  };
@@ -1216,6 +1218,7 @@
                               RegExpNode* on_success,
                               RegExpNode* on_failure);
    virtual RegExpDisjunction* AsDisjunction();
+  virtual bool IsDisjunction();
    ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
   private:
    ZoneList<RegExpTree*>* alternatives_;
@@ -1230,6 +1233,7 @@
                               RegExpNode* on_success,
                               RegExpNode* on_failure);
    virtual RegExpAlternative* AsAlternative();
+  virtual bool IsAlternative();
    ZoneList<RegExpTree*>* nodes() { return nodes_; }
   private:
    ZoneList<RegExpTree*>* nodes_;
@@ -1244,6 +1248,7 @@
                               RegExpNode* on_success,
                               RegExpNode* on_failure);
    virtual RegExpText* AsText();
+  virtual bool IsText();
    virtual bool IsTextElement() { return true; }
    virtual void AppendToText(RegExpText* text);
    void AddElement(TextElement elm) { elements_.Add(elm); }
@@ -1265,6 +1270,7 @@
                               RegExpNode* on_success,
                               RegExpNode* on_failure);
    virtual RegExpAssertion* AsAssertion();
+  virtual bool IsAssertion();
    Type type() { return type_; }
   private:
    Type type_;
@@ -1286,6 +1292,7 @@
                               RegExpNode* on_success,
                               RegExpNode* on_failure);
    virtual RegExpCharacterClass* AsCharacterClass();
+  virtual bool IsCharacterClass();
    virtual bool IsTextElement() { return true; }
    virtual void AppendToText(RegExpText* text);
    ZoneList<CharacterRange>* ranges() { return ranges_; }
@@ -1304,6 +1311,7 @@
                               RegExpNode* on_success,
                               RegExpNode* on_failure);
    virtual RegExpAtom* AsAtom();
+  virtual bool IsAtom();
    virtual bool IsTextElement() { return true; }
    virtual void AppendToText(RegExpText* text);
    Vector<const uc16> data() { return data_; }
@@ -1331,6 +1339,7 @@
                              RegExpNode* on_success,
                              RegExpNode* on_failure);
    virtual RegExpQuantifier* AsQuantifier();
+  virtual bool IsQuantifier();
    int min() { return min_; }
    int max() { return max_; }
    bool is_greedy() { return is_greedy_; }
@@ -1363,6 +1372,7 @@
                              RegExpNode* on_success,
                              RegExpNode* on_failure);
    virtual RegExpCapture* AsCapture();
+  virtual bool IsCapture();
    RegExpTree* body() { return body_; }
    int index() { return index_; }
    inline CaptureAvailability available() { return available_; }
@@ -1388,6 +1398,7 @@
                               RegExpNode* on_success,
                               RegExpNode* on_failure);
    virtual RegExpLookahead* AsLookahead();
+  virtual bool IsLookahead();
    RegExpTree* body() { return body_; }
    bool is_positive() { return is_positive_; }
   private:
@@ -1405,6 +1416,7 @@
                               RegExpNode* on_success,
                               RegExpNode* on_failure);
    virtual RegExpBackreference* AsBackreference();
+  virtual bool IsBackreference();
    int index() { return capture_->index(); }
    RegExpCapture* capture() { return capture_; }
   private:
@@ -1420,6 +1432,7 @@
                               RegExpNode* on_success,
                               RegExpNode* on_failure);
    virtual RegExpEmpty* AsEmpty();
+  virtual bool IsEmpty();
    static RegExpEmpty* GetInstance() { return &kInstance; }
   private:
    static RegExpEmpty kInstance;

Modified: branches/experimental/regexp2000/src/jsregexp.cc
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.cc    (original)
+++ branches/experimental/regexp2000/src/jsregexp.cc    Mon Nov 17 03:28:48  
2008
@@ -998,7 +998,6 @@
  void ChoiceNode::GenerateGuard(RegExpCompiler* compiler,
                                 Guard *guard,
                                 Label* on_failure) {
-
  }



Modified: branches/experimental/regexp2000/src/jsregexp.h
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.h     (original)
+++ branches/experimental/regexp2000/src/jsregexp.h     Mon Nov 17 03:28:48 2008
@@ -421,7 +421,7 @@
   public:
    enum Type {UNINITIALIZED, ATOM, CHAR_CLASS};
    TextElement() : type(UNINITIALIZED) { }
-  TextElement(Type t) : type(t) { }
+  explicit TextElement(Type t) : type(t) { }
    static TextElement Atom(RegExpAtom* atom);
    static TextElement CharClass(RegExpCharacterClass* char_class);
    Type type;

Modified: branches/experimental/regexp2000/src/parser.cc
==============================================================================
--- branches/experimental/regexp2000/src/parser.cc      (original)
+++ branches/experimental/regexp2000/src/parser.cc      Mon Nov 17 03:28:48 2008
@@ -387,6 +387,10 @@


  void RegExpBuilder::AddAtom(RegExpTree* term) {
+  if (term->IsEmpty()) {
+    AddEmpty();
+    return;
+  }
    if (term->IsTextElement()) {
      FlushCharacters();
      text_.Add(term);
@@ -470,6 +474,16 @@
    } else if (terms_.length() > 0) {
      ASSERT(last_added_ == ADD_ATOM);
      atom = terms_.RemoveLast();
+    if (atom->IsLookahead() || atom->IsAssertion()) {
+      // Guaranteed not to match a non-empty string.
+      // Assertion as an atom can happen as, e.g., (?:\b)
+      LAST(ADD_TERM);
+      if (min == 0) {
+        return;
+      }
+      terms_.Add(atom);
+      return;
+    }
    } else {
      // Only call immediately after adding an atom or character!
      UNREACHABLE();

Modified: branches/experimental/regexp2000/test/cctest/test-regexp.cc
==============================================================================
--- branches/experimental/regexp2000/test/cctest/test-regexp.cc (original)
+++ branches/experimental/regexp2000/test/cctest/test-regexp.cc Mon Nov 17  
03:28:48 2008
@@ -96,8 +96,8 @@
    CHECK_PARSE_EQ("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
    CHECK_PARSE_EQ("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
    CHECK_PARSE_EQ("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
-  CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\fb\nc\rd\te\vf'");
-  CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\nb' @b 'c')");
+   
CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
+  CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
    CHECK_PARSE_EQ("(?:foo)", "'foo'");
    CHECK_PARSE_EQ("(?: foo )", "' foo '");
    CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
@@ -106,8 +106,8 @@
    CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
    CHECK_PARSE_EQ("()", "(^ %)");
    CHECK_PARSE_EQ("(?=)", "(-> + %)");
-  CHECK_PARSE_EQ("[]", "^[\x00-\uffff]");
-  CHECK_PARSE_EQ("[^]", "[\x00-\uffff]");
+  CHECK_PARSE_EQ("[]", "^[\\x00-\\uffff]");   // Doesn't compile on windows
+  CHECK_PARSE_EQ("[^]", "[\\x00-\\uffff]");   // \uffff isn't in codepage  
1252
    CHECK_PARSE_EQ("[x]", "[x]");
    CHECK_PARSE_EQ("[xyz]", "[x y z]");
    CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
@@ -120,27 +120,28 @@
    CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]");
    CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]");
    CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]");
-  CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\n\n\t\t\v\v'");
+  CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK",
+                 "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
    CHECK_PARSE_EQ("\\c!", "'c!'");
    CHECK_PARSE_EQ("\\c_", "'c_'");
    CHECK_PARSE_EQ("\\c~", "'c~'");
    CHECK_PARSE_EQ("[a\\]c]", "[a ] c]");
    CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
    CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ #   
]");
-  CHECK_PARSE_EQ("\\0", "'\0'");
+  CHECK_PARSE_EQ("\\0", "'\\x00'");
    CHECK_PARSE_EQ("\\8", "'8'");
    CHECK_PARSE_EQ("\\9", "'9'");
-  CHECK_PARSE_EQ("\\11", "'\t'");
-  CHECK_PARSE_EQ("\\11a", "'\ta'");
-  CHECK_PARSE_EQ("\\011", "'\t'");
-  CHECK_PARSE_EQ("\\00011", "'\00011'");
-  CHECK_PARSE_EQ("\\118", "'\t8'");
+  CHECK_PARSE_EQ("\\11", "'\\x09'");
+  CHECK_PARSE_EQ("\\11a", "'\\x09a'");
+  CHECK_PARSE_EQ("\\011", "'\\x09'");
+  CHECK_PARSE_EQ("\\00011", "'\\x0011'");
+  CHECK_PARSE_EQ("\\118", "'\\x098'");
    CHECK_PARSE_EQ("\\111", "'I'");
    CHECK_PARSE_EQ("\\1111", "'I1'");
    CHECK_PARSE_EQ("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
    CHECK_PARSE_EQ("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
    CHECK_PARSE_EQ("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
-  CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\x04')");
+  CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
    CHECK_PARSE_EQ("(x)(x)(x)\\1*", "(: (^ 'x') (^ 'x') (^ 'x')"
                                 " (# 0 - g (<- 1)))");
    CHECK_PARSE_EQ("(x)(x)(x)\\2*", "(: (^ 'x') (^ 'x') (^ 'x')"
@@ -148,25 +149,25 @@
    CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')"
                                 " (# 0 - g (<- 3)))");
    CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')"
-                               " (# 0 - g '\x04'))");
+                               " (# 0 - g '\\x04'))");
    CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
                "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
                " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
    CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
                "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
-              " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\x09')");
+              " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
    CHECK_PARSE_EQ("(a)\\1", "(: (^ 'a') (<- 1))");
    CHECK_PARSE_EQ("(a\\1)", "(^ 'a')");
    CHECK_PARSE_EQ("(\\1a)", "(^ 'a')");
-  CHECK_PARSE_EQ("\\1(a)", "(: '\x01' (^ 'a'))");
+  CHECK_PARSE_EQ("\\1(a)", "(: '\\x01' (^ 'a'))");
    CHECK_PARSE_EQ("(?!(a))\\1", "(-> - (^ 'a'))");
-  CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(-> - (: '\x01' (^ 'a') (<-  
1)))");
-  CHECK_PARSE_EQ("[\\0]", "[\0]");
-  CHECK_PARSE_EQ("[\\11]", "[\t]");
-  CHECK_PARSE_EQ("[\\11a]", "[\t a]");
-  CHECK_PARSE_EQ("[\\011]", "[\t]");
-  CHECK_PARSE_EQ("[\\00011]", "[\000 1 1]");
-  CHECK_PARSE_EQ("[\\118]", "[\t 8]");
+  CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(-> - (: '\\x01' (^ 'a') (<-  
1)))");
+  CHECK_PARSE_EQ("[\\0]", "[\\x00]");
+  CHECK_PARSE_EQ("[\\11]", "[\\x09]");
+  CHECK_PARSE_EQ("[\\11a]", "[\\x09 a]");
+  CHECK_PARSE_EQ("[\\011]", "[\\x09]");
+  CHECK_PARSE_EQ("[\\00011]", "[\\x00 1 1]");
+  CHECK_PARSE_EQ("[\\118]", "[\\x09 8]");
    CHECK_PARSE_EQ("[\\111]", "[I]");
    CHECK_PARSE_EQ("[\\1111]", "[I 1]");
    CHECK_PARSE_EQ("\\x34", "'\x34'");

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to