Author: [EMAIL PROTECTED]
Date: Fri Oct 31 06:44:56 2008
New Revision: 674

Modified:
    branches/experimental/regexp2000/src/ast.cc
    branches/experimental/regexp2000/src/ast.h
    branches/experimental/regexp2000/src/jsregexp.cc
    branches/experimental/regexp2000/src/jsregexp.h
    branches/experimental/regexp2000/src/objects-debug.cc
    branches/experimental/regexp2000/src/objects-inl.h
    branches/experimental/regexp2000/src/objects.h
    branches/experimental/regexp2000/src/parser.cc

Log:
Use new RegExp parser for syntax checking patterns and selecting atom  
matching.
Delay compilation of JSCRE regexps until their first use.
Includes previous change-list, which got committed to the wrong branch.


Modified: branches/experimental/regexp2000/src/ast.cc
==============================================================================
--- branches/experimental/regexp2000/src/ast.cc (original)
+++ branches/experimental/regexp2000/src/ast.cc Fri Oct 31 06:44:56 2008
@@ -190,6 +190,19 @@
  FOR_EACH_REG_EXP_NODE_TYPE(MAKE_ACCEPT)
  #undef MAKE_ACCEPT

+#define MAKE_CONVERSION(Name)                                        \
+  RegExp##Name* RegExpTree::As##Name() {                             \
+    return NULL;                                                     \
+  }
+  FOR_EACH_REG_EXP_NODE_TYPE(MAKE_CONVERSION)
+#undef MAKE_CONVERSION
+
+#define MAKE_CONVERSION(Name)                                       \
+  RegExp##Name* RegExp##Name::As##Name() {                          \
+    return this;                                                    \
+  }
+FOR_EACH_REG_EXP_NODE_TYPE(MAKE_CONVERSION)
+#undef MAKE_CONVERSION

  RegExpEmpty RegExpEmpty::kInstance;


Modified: branches/experimental/regexp2000/src/ast.h
==============================================================================
--- branches/experimental/regexp2000/src/ast.h  (original)
+++ branches/experimental/regexp2000/src/ast.h  Fri Oct 31 06:44:56 2008
@@ -1215,6 +1215,9 @@
    virtual ~RegExpTree() { }
    virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
    SmartPointer<char> ToString();
+#define MAKE_ASTYPE(Name)  virtual RegExp##Name* As##Name();
+  FOR_EACH_REG_EXP_NODE_TYPE(MAKE_ASTYPE)
+#undef MAKE_ASTYPE
  };


@@ -1222,6 +1225,7 @@
   public:
    explicit RegExpDisjunction(ZoneList<RegExpTree*>* nodes) : nodes_(nodes)  
{ }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpDisjunction* AsDisjunction();
    ZoneList<RegExpTree*>* nodes() { return nodes_; }
   private:
    ZoneList<RegExpTree*>* nodes_;
@@ -1232,6 +1236,7 @@
   public:
    explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes) : nodes_(nodes)  
{ }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpAlternative* AsAlternative();
    ZoneList<RegExpTree*>* nodes() { return nodes_; }
   private:
    ZoneList<RegExpTree*>* nodes_;
@@ -1246,6 +1251,7 @@
    };
    explicit RegExpAssertion(Type type) : type_(type) { }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpAssertion* AsAssertion();
    Type type() { return type_; }
   private:
    Type type_;
@@ -1298,6 +1304,7 @@
      : ranges_(ranges),
        is_negated_(is_negated) { }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpCharacterClass* AsCharacterClass();
    ZoneList<CharacterRange>* ranges() { return ranges_; }
    bool is_negated() { return is_negated_; }
   private:
@@ -1310,6 +1317,7 @@
   public:
    explicit RegExpAtom(Vector<const uc16> data) : data_(data) { }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpAtom* AsAtom();
    Vector<const uc16> data() { return data_; }
   private:
    Vector<const uc16> data_;
@@ -1324,6 +1332,7 @@
        is_greedy_(is_greedy),
        body_(body) { }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpQuantifier* AsQuantifier();
    int min() { return min_; }
    int max() { return max_; }
    bool is_greedy() { return is_greedy_; }
@@ -1344,6 +1353,7 @@
    explicit RegExpCapture(RegExpTree* body)
      : body_(body) { }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpCapture* AsCapture();
    RegExpTree* body() { return body_; }
   private:
    RegExpTree* body_;
@@ -1356,6 +1366,7 @@
      : body_(body),
        is_positive_(is_positive) { }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpLookahead* AsLookahead();
    RegExpTree* body() { return body_; }
    bool is_positive() { return is_positive_; }
   private:
@@ -1368,6 +1379,7 @@
   public:
    explicit RegExpBackreference(int index) : index_(index) { }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpBackreference* AsBackreference();
    int index() { return index_; }
   private:
    int index_;
@@ -1378,6 +1390,7 @@
   public:
    RegExpEmpty() { }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpEmpty* AsEmpty();
    static RegExpEmpty* GetInstance() { return &kInstance; }
   private:
    static RegExpEmpty kInstance;

Modified: branches/experimental/regexp2000/src/jsregexp.cc
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.cc    (original)
+++ branches/experimental/regexp2000/src/jsregexp.cc    Fri Oct 31 06:44:56  
2008
@@ -38,6 +38,7 @@
  #include "top.h"
  #include "compilation-cache.h"
  #include "string-stream.h"
+#include "parser.h"

  // Including pcre.h undefines DEBUG to avoid getting debug output from
  // the JSCRE implementation. Make sure to redefine it in debug mode
@@ -176,7 +177,16 @@
  }


-unibrow::Predicate<unibrow::RegExpSpecialChar, 128>  
is_reg_exp_special_char;
+static inline Handle<Object> CreateRegExpException(Handle<JSRegExp> re,
+                                                   Handle<String> pattern,
+                                                   Handle<String>  
error_text,
+                                                   const char* message) {
+  Handle<JSArray> array = Factory::NewJSArray(2);
+  SetElement(array, 0, pattern);
+  SetElement(array, 1, error_text);
+  Handle<Object> regexp_err = Factory::NewSyntaxError(message, array);
+  return Handle<Object>(Top::Throw(*regexp_err));
+}


  Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
@@ -190,15 +200,21 @@
      re->set_data(*cached);
      result = re;
    } else {
-    bool is_atom = !flags.is_ignore_case();
-    for (int i = 0; is_atom && i < pattern->length(); i++) {
-      if (is_reg_exp_special_char.get(pattern->Get(i)))
-        is_atom = false;
-    }
-    if (is_atom) {
-      result = AtomCompile(re, pattern, flags);
+    SafeStringInputBuffer buffer(pattern.location());
+    Handle<String> error_text;
+    RegExpTree* ast = ParseRegExp(&buffer, &error_text);
+    if (!error_text.is_null()) {
+      // Throw an exception if we fail to parse the pattern.
+      return CreateRegExpException(re, pattern,  
error_text, "malformed_regexp");
+    }
+    RegExpAtom* atom = ast->AsAtom();
+    if (atom != NULL && !flags.is_ignore_case()) {
+      Vector<const uc16> atom_pattern = atom->data();
+      // Test if pattern equals atom_pattern and reuse pattern if it does.
+      Handle<String> atom_string =  
Factory::NewStringFromTwoByte(atom_pattern);
+      result = AtomCompile(re, atom_string, flags);
      } else {
-      result = JsreCompile(re, pattern, flags);
+      result = JsrePrepare(re, pattern, flags);
      }
      Object* data = re->data();
      if (data->IsFixedArray()) {
@@ -311,9 +327,22 @@
  }


-Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re,
-                                       Handle<String> pattern,
-                                       JSRegExp::Flags flags) {
+Handle<Object>RegExpImpl::JsrePrepare(Handle<JSRegExp> re,
+                                      Handle<String> pattern,
+                                      JSRegExp::Flags flags) {
+  Handle<Object> value(Heap::undefined_value());
+  Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);
+  return re;
+}
+
+
+Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re) {
+  ASSERT_EQ(re->TypeTag(), JSRegExp::JSCRE);
+  ASSERT(re->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined());
+
+  Handle<String> pattern(re->Pattern());
+  JSRegExp::Flags flags = re->GetFlags();
+
    JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case()
      ? JSRegExpIgnoreCase
      : JSRegExpDoNotIgnoreCase;
@@ -477,6 +506,13 @@
  Handle<Object> RegExpImpl::JsreExec(Handle<JSRegExp> regexp,
                                      Handle<String> subject,
                                      Handle<Object> index) {
+  ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
+  if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
+    Handle<Object> compile_result = JsreCompile(regexp);
+    if (compile_result->IsException()) return compile_result;
+  }
+  ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
+
    // Prepare space for the return values.
    int num_captures = JsreCapture(regexp);

@@ -497,6 +533,13 @@

  Handle<Object> RegExpImpl::JsreExecGlobal(Handle<JSRegExp> regexp,
                                            Handle<String> subject) {
+  ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
+  if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
+    Handle<Object> compile_result = JsreCompile(regexp);
+    if (compile_result->IsException()) return compile_result;
+  }
+  ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
+
    // Prepare space for the return values.
    int num_captures = JsreCapture(regexp);

@@ -898,7 +941,7 @@

  StaticCharacterClasses::StaticCharacterClasses() {
  #define MAKE_CLASS(Name)\
-  CharacterClass::Ranges(Vector<CharacterClass::Range>(k##Name##Ranges,\
+  CharacterClass::Ranges(Vector<CharacterClass::Range>(k##Name##Ranges, \
                                                          
k##Name##RangeCount), \
                           &static_allocator_)


Modified: branches/experimental/regexp2000/src/jsregexp.h
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.h     (original)
+++ branches/experimental/regexp2000/src/jsregexp.h     Fri Oct 31 06:44:56 2008
@@ -61,10 +61,19 @@
    static Handle<Object> ExecGlobal(Handle<JSRegExp> regexp,
                                     Handle<String> subject);

-  static Handle<Object> AtomCompile(Handle<JSRegExp> re,
+  // Stores an uncompiled RegExp pattern in the JSRegExp object.
+  // It will be compiled by JSCRE when first executed.
+  static Handle<Object> JsrePrepare(Handle<JSRegExp> re,
                                      Handle<String> pattern,
                                      JSRegExp::Flags flags);

+  // Compile the pattern using JSCRE and store the result in the
+  // JSRegExp object.
+  static Handle<Object> JsreCompile(Handle<JSRegExp> re);
+
+  static Handle<Object> AtomCompile(Handle<JSRegExp> re,
+                                    Handle<String> pattern,
+                                    JSRegExp::Flags flags);
    static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
                                   Handle<String> subject,
                                   Handle<Object> index);
@@ -76,6 +85,7 @@
                                      Handle<String> pattern,
                                      JSRegExp::Flags flags);

+  // Execute a compiled JSCRE pattern.
    static Handle<Object> JsreExec(Handle<JSRegExp> regexp,
                                   Handle<String> subject,
                                   Handle<Object> index);
@@ -194,7 +204,7 @@
    static const int kFieldMax = (1 << kFieldWidth);
    static const int kSegmentMask = (1 << kFieldWidth) - 1;
    static const int kNibbleCount = kFieldMax / 4;
-  STATIC_ASSERT(kFieldMax == 8 * sizeof(uint64_t));
+  STATIC_CHECK(kFieldMax == 8 * sizeof(uint64_t));

    Type type() { return type_; }

@@ -218,7 +228,7 @@
  };


-STATIC_ASSERT(sizeof(CharacterClass) == 3 * kIntSize);
+STATIC_CHECK(sizeof(CharacterClass) == 3 * kIntSize);


  class CharacterClassAllocator {

Modified: branches/experimental/regexp2000/src/objects-debug.cc
==============================================================================
--- branches/experimental/regexp2000/src/objects-debug.cc       (original)
+++ branches/experimental/regexp2000/src/objects-debug.cc       Fri Oct 31  
06:44:56 2008
@@ -669,7 +669,8 @@
      }
      case JSRegExp::JSCRE: {
        FixedArray* arr = FixedArray::cast(data());
-      ASSERT(arr->get(JSRegExp::kJscreDataIndex)->IsFixedArray());
+      Object* jscre_data = arr->get(JSRegExp::kJscreDataIndex);
+      ASSERT(jscre_data->IsFixedArray() || jscre_data->IsUndefined());
        break;
      }
      default:

Modified: branches/experimental/regexp2000/src/objects-inl.h
==============================================================================
--- branches/experimental/regexp2000/src/objects-inl.h  (original)
+++ branches/experimental/regexp2000/src/objects-inl.h  Fri Oct 31 06:44:56  
2008
@@ -2149,6 +2149,22 @@
  }


+JSRegExp::Flags JSRegExp::GetFlags() {
+  ASSERT(this->data()->IsFixedArray());
+  Object* data = this->data();
+  Smi* smi = Smi::cast(FixedArray::cast(data)->get(kFlagsIndex));
+  return Flags(smi->value());
+}
+
+
+String* JSRegExp::Pattern() {
+  ASSERT(this->data()->IsFixedArray());
+  Object* data = this->data();
+  String* pattern= String::cast(FixedArray::cast(data)->get(kSourceIndex));
+  return pattern;
+}
+
+
  Object* JSRegExp::DataAt(int index) {
    ASSERT(TypeTag() != NOT_COMPILED);
    return FixedArray::cast(data())->get(index);

Modified: branches/experimental/regexp2000/src/objects.h
==============================================================================
--- branches/experimental/regexp2000/src/objects.h      (original)
+++ branches/experimental/regexp2000/src/objects.h      Fri Oct 31 06:44:56 2008
@@ -2915,6 +2915,10 @@
  // Regular expressions
  class JSRegExp: public JSObject {
   public:
+  // Meaning of Type:
+  // NOT_COMPILED: Initial value. No data has been stored in the JSRegExp  
yet.
+  // JSCRE: A complex RegExp for JSCRE
+  // ATOM: A simple string to match against using an indexOf operation.
    enum Type { NOT_COMPILED, JSCRE, ATOM };
    enum Flag { NONE = 0, GLOBAL = 1, IGNORE_CASE = 2, MULTILINE = 4 };

@@ -2932,6 +2936,8 @@
    DECL_ACCESSORS(data, Object)

    inline Type TypeTag();
+  inline Flags GetFlags();
+  inline String* Pattern();
    inline Object* DataAt(int index);

    static inline JSRegExp* cast(Object* obj);

Modified: branches/experimental/regexp2000/src/parser.cc
==============================================================================
--- branches/experimental/regexp2000/src/parser.cc      (original)
+++ branches/experimental/regexp2000/src/parser.cc      Fri Oct 31 06:44:56 2008
@@ -3820,6 +3820,7 @@
  RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) {
    static const char* kUnterminated = "Unterminated character class";
    static const char* kIllegal = "Illegal character class";
+  static const char* kRangeOutOfOrder = "Range out of order in character  
class";

    ASSERT_EQ(current(), '[');
    Advance();
@@ -3840,6 +3841,9 @@
          CharacterRange next = ParseClassAtom(CHECK_OK);
          if (next.is_character_class()) {
            return ReportError(CStrVector(kIllegal), CHECK_OK);
+        }
+        if (first.from() > next.to()) {
+          return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK);
          }
          ranges->Add(CharacterRange::Range(first.from(), next.to()));
        } else {

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to