Author: [EMAIL PROTECTED]
Date: Fri Nov 14 03:26:08 2008
New Revision: 753

Modified:
    branches/experimental/regexp2000/src/factory.cc
    branches/experimental/regexp2000/src/jsregexp.cc
    branches/experimental/regexp2000/src/jsregexp.h
    branches/experimental/regexp2000/src/objects-debug.cc
    branches/experimental/regexp2000/src/objects-inl.h
    branches/experimental/regexp2000/src/objects.h
    branches/experimental/regexp2000/test/cctest/test-regexp.cc

Log:
Wire Regexp2000 up to the normal JS RegExp object.
Review URL: http://codereview.chromium.org/10943

Modified: branches/experimental/regexp2000/src/factory.cc
==============================================================================
--- branches/experimental/regexp2000/src/factory.cc     (original)
+++ branches/experimental/regexp2000/src/factory.cc     Fri Nov 14 03:26:08 2008
@@ -706,8 +706,11 @@
    ASSERT(type != INVALID_TYPE);

    Handle<JSFunction> result =
-      Factory::NewFunction(Factory::empty_symbol(), type, instance_size,
-                           code, true);
+      Factory::NewFunction(Factory::empty_symbol(),
+                           type,
+                           instance_size,
+                           code,
+                           true);
    // Set class name.
    Handle<Object> class_name = Handle<Object>(obj->class_name());
    if (class_name->IsString()) {

Modified: branches/experimental/regexp2000/src/jsregexp.cc
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.cc    (original)
+++ branches/experimental/regexp2000/src/jsregexp.cc    Fri Nov 14 03:26:08  
2008
@@ -40,7 +40,10 @@
  #include "compilation-cache.h"
  #include "string-stream.h"
  #include "parser.h"
+#include "assembler-re2k.h"
  #include "regexp-macro-assembler.h"
+#include "regexp-macro-assembler-re2k.h"
+#include "interpreter-re2k.h"

  // Including pcre.h undefines DEBUG to avoid getting debug output from
  // the JSCRE implementation. Make sure to redefine it in debug mode
@@ -56,9 +59,6 @@
  namespace v8 { namespace internal {


-#define CAPTURE_INDEX 0
-#define INTERNAL_INDEX 1
-
  static Failure* malloc_failure;

  static void* JSREMalloc(size_t size) {
@@ -229,7 +229,16 @@
          result = AtomCompile(re, pattern, flags, pattern);
        }
      } else {
-      result = JsrePrepare(re, pattern, flags);
+      RegExpNode* node = NULL;
+      Handle<FixedArray> re2k_data =
+          RegExpEngine::Compile(&parse_result,
+                                &node,
+                                flags.is_ignore_case());
+      if (re2k_data.is_null()) {
+        result = JscrePrepare(re, pattern, flags);
+      } else {
+        result = Re2kPrepare(re, pattern, flags, re2k_data);
+      }
      }
      Object* data = re->data();
      if (data->IsFixedArray()) {
@@ -250,9 +259,11 @@
                                  Handle<Object> index) {
    switch (regexp->TypeTag()) {
      case JSRegExp::JSCRE:
-      return JsreExec(regexp, subject, index);
+      return JscreExec(regexp, subject, index);
      case JSRegExp::ATOM:
        return AtomExec(regexp, subject, index);
+    case JSRegExp::RE2K:
+      return Re2kExec(regexp, subject, index);
      default:
        UNREACHABLE();
        return Handle<Object>();
@@ -264,9 +275,11 @@
                                  Handle<String> subject) {
    switch (regexp->TypeTag()) {
      case JSRegExp::JSCRE:
-      return JsreExecGlobal(regexp, subject);
+      return JscreExecGlobal(regexp, subject);
      case JSRegExp::ATOM:
        return AtomExecGlobal(regexp, subject);
+    case JSRegExp::RE2K:
+      return Re2kExecGlobal(regexp, subject);
      default:
        UNREACHABLE();
        return Handle<Object>();
@@ -298,12 +311,8 @@
    if (value == -1) return Factory::null_value();

    Handle<FixedArray> array = Factory::NewFixedArray(2);
-  array->set(0,
-             Smi::FromInt(value),
-             SKIP_WRITE_BARRIER);
-  array->set(1,
-             Smi::FromInt(value + needle->length()),
-             SKIP_WRITE_BARRIER);
+  array->set(0, Smi::FromInt(value));
+  array->set(1, Smi::FromInt(value + needle->length()));
    return Factory::NewJSArrayWithElements(array);
  }

@@ -327,12 +336,8 @@
      int end = value + needle_length;

      Handle<FixedArray> array = Factory::NewFixedArray(2);
-    array->set(0,
-               Smi::FromInt(value),
-               SKIP_WRITE_BARRIER);
-    array->set(1,
-               Smi::FromInt(end),
-               SKIP_WRITE_BARRIER);
+    array->set(0, Smi::FromInt(value));
+    array->set(1, Smi::FromInt(end));
      Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
      SetElement(result, match_count, pair);
      match_count++;
@@ -343,15 +348,24 @@
  }


-Handle<Object>RegExpImpl::JsrePrepare(Handle<JSRegExp> re,
-                                      Handle<String> pattern,
-                                      JSRegExp::Flags flags) {
+Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re,
+                                       Handle<String> pattern,
+                                       JSRegExp::Flags flags) {
    Handle<Object> value(Heap::undefined_value());
    Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);
    return re;
  }


+Handle<Object>RegExpImpl::Re2kPrepare(Handle<JSRegExp> re,
+                                      Handle<String> pattern,
+                                      JSRegExp::Flags flags,
+                                      Handle<FixedArray> re2k_data) {
+  Factory::SetRegExpData(re, JSRegExp::RE2K, pattern, flags, re2k_data);
+  return re;
+}
+
+
  static inline Object* DoCompile(String* pattern,
                                  JSRegExp::Flags flags,
                                  unsigned* number_of_captures,
@@ -398,7 +412,7 @@
  }


-Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re) {
+Handle<Object> RegExpImpl::JscreCompile(Handle<JSRegExp> re) {
    ASSERT_EQ(re->TypeTag(), JSRegExp::JSCRE);
    ASSERT(re->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined());

@@ -435,26 +449,65 @@
    Handle<ByteArray> internal(
        ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code)));

-  Handle<FixedArray> value = Factory::NewFixedArray(2);
-  value->set(CAPTURE_INDEX, Smi::FromInt(number_of_captures));
-  value->set(INTERNAL_INDEX, *internal);
+  Handle<FixedArray> value = Factory::NewFixedArray(kJscreDataLength);
+  value->set(kJscreNumberOfCapturesIndex,  
Smi::FromInt(number_of_captures));
+  value->set(kJscreInternalIndex, *internal);
    Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);

    return re;
  }


-Handle<Object> RegExpImpl::JsreExecOnce(Handle<JSRegExp> regexp,
+Handle<Object> RegExpImpl::Re2kExecOnce(Handle<JSRegExp> regexp,
                                          int num_captures,
                                          Handle<String> subject,
                                          int previous_index,
                                          const uc16* two_byte_subject,
                                          int* offsets_vector,
                                          int offsets_vector_length) {
+  bool rc;
+  {
+    for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
+      offsets_vector[i] = -1;
+    }
+
+    AssertNoAllocation a;
+
+    LOG(RegExpExecEvent(regexp, previous_index, subject));
+
+    Handle<ByteArray> byte_codes = Re2kCode(regexp);
+
+    rc = Re2kInterpreter::Match(byte_codes,
+                                subject,
+                                offsets_vector,
+                                previous_index);
+  }
+
+  if (!rc) {
+    return Factory::null_value();
+  }
+
+  Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
+  // The captures come in (start, end+1) pairs.
+  for (int i = 0; i < 2 * (num_captures+1); i += 2) {
+    array->set(i, Smi::FromInt(offsets_vector[i]));
+    array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
+  }
+  return Factory::NewJSArrayWithElements(array);
+}
+
+
+Handle<Object> RegExpImpl::JscreExecOnce(Handle<JSRegExp> regexp,
+                                         int num_captures,
+                                         Handle<String> subject,
+                                         int previous_index,
+                                         const uc16* two_byte_subject,
+                                         int* offsets_vector,
+                                         int offsets_vector_length) {
    int rc;
    {
      AssertNoAllocation a;
-    ByteArray* internal = JsreInternal(regexp);
+    ByteArray* internal = JscreInternal(regexp);
      const JscreRegExp* js_regexp =
          reinterpret_cast<JscreRegExp*>(internal->GetDataStartAddress());

@@ -488,12 +541,8 @@
    Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
    // The captures come in (start, end+1) pairs.
    for (int i = 0; i < 2 * (num_captures+1); i += 2) {
-    array->set(i,
-               Smi::FromInt(offsets_vector[i]),
-               SKIP_WRITE_BARRIER);
-    array->set(i+1,
-               Smi::FromInt(offsets_vector[i+1]),
-               SKIP_WRITE_BARRIER);
+    array->set(i, Smi::FromInt(offsets_vector[i]));
+    array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
    }
    return Factory::NewJSArrayWithElements(array);
  }
@@ -501,8 +550,8 @@

  class OffsetsVector {
   public:
-  inline OffsetsVector(int num_captures) {
-    offsets_vector_length_ = (num_captures + 1) * 3;
+  inline OffsetsVector(int num_registers) :
+    offsets_vector_length_(num_registers) {
      if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
        vector_ = NewArray<int>(offsets_vector_length_);
      } else {
@@ -531,7 +580,7 @@
   private:
    int* vector_;
    int offsets_vector_length_;
-  static const int kStaticOffsetsVectorSize = 30;
+  static const int kStaticOffsetsVectorSize = 50;
    static int static_offsets_vector_[kStaticOffsetsVectorSize];
  };

@@ -540,47 +589,127 @@
      OffsetsVector::kStaticOffsetsVectorSize];


-Handle<Object> RegExpImpl::JsreExec(Handle<JSRegExp> regexp,
+Handle<Object> RegExpImpl::Re2kExec(Handle<JSRegExp> regexp,
                                      Handle<String> subject,
                                      Handle<Object> index) {
+  ASSERT_EQ(regexp->TypeTag(), JSRegExp::RE2K);
+  ASSERT(!regexp->DataAt(JSRegExp::kRe2kDataIndex)->IsUndefined());
+
+  // Prepare space for the return values.
+  int number_of_registers = Re2kNumberOfRegisters(regexp);
+  OffsetsVector offsets(number_of_registers);
+
+  int num_captures = Re2kNumberOfCaptures(regexp);
+
+  int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
+
+  Handle<String> subject16 = CachedStringToTwoByte(subject);
+
+  Handle<Object> result(Re2kExecOnce(regexp,
+                                     num_captures,
+                                     subject,
+                                     previous_index,
+                                     subject16->GetTwoByteData(),
+                                     offsets.vector(),
+                                     offsets.length()));
+  return result;
+}
+
+
+Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,
+                                     Handle<String> subject,
+                                     Handle<Object> index) {
    ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
    if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
-    Handle<Object> compile_result = JsreCompile(regexp);
+    Handle<Object> compile_result = JscreCompile(regexp);
      if (compile_result->IsException()) return compile_result;
    }
    ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());

-  // Prepare space for the return values.
-  int num_captures = JsreCapture(regexp);
+  int num_captures = JscreNumberOfCaptures(regexp);

-  OffsetsVector offsets(num_captures);
+  OffsetsVector offsets((num_captures + 1) * 3);

    int previous_index = static_cast<int>(DoubleToInteger(index->Number()));

    Handle<String> subject16 = CachedStringToTwoByte(subject);

-  Handle<Object> result(JsreExecOnce(regexp, num_captures, subject,
-                                     previous_index,
-                                     subject16->GetTwoByteData(),
-                                     offsets.vector(), offsets.length()));
+  Handle<Object> result(JscreExecOnce(regexp,
+                                      num_captures,
+                                      subject,
+                                      previous_index,
+                                      subject16->GetTwoByteData(),
+                                      offsets.vector(),
+                                      offsets.length()));

    return result;
  }


-Handle<Object> RegExpImpl::JsreExecGlobal(Handle<JSRegExp> regexp,
+Handle<Object> RegExpImpl::Re2kExecGlobal(Handle<JSRegExp> regexp,
                                            Handle<String> subject) {
+  ASSERT_EQ(regexp->TypeTag(), JSRegExp::RE2K);
+  ASSERT(!regexp->DataAt(JSRegExp::kRe2kDataIndex)->IsUndefined());
+
+  // Prepare space for the return values.
+  int number_of_registers = Re2kNumberOfRegisters(regexp);
+  OffsetsVector offsets(number_of_registers);
+
+  int previous_index = 0;
+
+  Handle<JSArray> result = Factory::NewJSArray(0);
+  int i = 0;
+  Handle<Object> matches;
+
+  Handle<String> subject16 = CachedStringToTwoByte(subject);
+
+  do {
+    if (previous_index > subject->length() || previous_index < 0) {
+      // Per ECMA-262 15.10.6.2, if the previous index is greater than the
+      // string length, there is no match.
+      matches = Factory::null_value();
+    } else {
+      matches = Re2kExecOnce(regexp,
+                             Re2kNumberOfCaptures(regexp),
+                             subject,
+                             previous_index,
+                             subject16->GetTwoByteData(),
+                             offsets.vector(),
+                             offsets.length());
+
+      if (matches->IsJSArray()) {
+        SetElement(result, i, matches);
+        i++;
+        previous_index = offsets.vector()[1];
+        if (offsets.vector()[0] == offsets.vector()[1]) {
+          previous_index++;
+        }
+      }
+    }
+  } while (matches->IsJSArray());
+
+  // If we exited the loop with an exception, throw it.
+  if (matches->IsNull()) {  // Exited loop normally.
+    return result;
+  } else {  // Exited loop with the exception in matches.
+    return matches;
+  }
+}
+
+
+Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
+                                           Handle<String> subject) {
    ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
    if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
-    Handle<Object> compile_result = JsreCompile(regexp);
+    Handle<Object> compile_result = JscreCompile(regexp);
      if (compile_result->IsException()) return compile_result;
    }
    ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());

    // Prepare space for the return values.
-  int num_captures = JsreCapture(regexp);
+  int num_captures = JscreNumberOfCaptures(regexp);

-  OffsetsVector offsets(num_captures);
+  OffsetsVector offsets((num_captures + 1) * 3);

    int previous_index = 0;

@@ -596,9 +725,13 @@
        // string length, there is no match.
        matches = Factory::null_value();
      } else {
-      matches = JsreExecOnce(regexp, num_captures, subject, previous_index,
-                             subject16->GetTwoByteData(),
-                             offsets.vector(), offsets.length());
+      matches = JscreExecOnce(regexp,
+                              num_captures,
+                              subject,
+                              previous_index,
+                              subject16->GetTwoByteData(),
+                              offsets.vector(),
+                              offsets.length());

        if (matches->IsJSArray()) {
          SetElement(result, i, matches);
@@ -620,15 +753,34 @@
  }


-int RegExpImpl::JsreCapture(Handle<JSRegExp> re) {
+int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) {
    FixedArray* value =  
FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
-  return Smi::cast(value->get(CAPTURE_INDEX))->value();
+  return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->
+      value();
  }


-ByteArray* RegExpImpl::JsreInternal(Handle<JSRegExp> re) {
+ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) {
    FixedArray* value =  
FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
-  return ByteArray::cast(value->get(INTERNAL_INDEX));
+  return ByteArray::cast(value->get(kJscreInternalIndex));
+}
+
+
+int RegExpImpl::Re2kNumberOfCaptures(Handle<JSRegExp> re) {
+  FixedArray* value =  
FixedArray::cast(re->DataAt(JSRegExp::kRe2kDataIndex));
+  return Smi::cast(value->get(kRe2kNumberOfCapturesIndex))->value();
+}
+
+
+int RegExpImpl::Re2kNumberOfRegisters(Handle<JSRegExp> re) {
+  FixedArray* value =  
FixedArray::cast(re->DataAt(JSRegExp::kRe2kDataIndex));
+  return Smi::cast(value->get(kRe2kNumberOfRegistersIndex))->value();
+}
+
+
+Handle<ByteArray> RegExpImpl::Re2kCode(Handle<JSRegExp> re) {
+  FixedArray* value =  
FixedArray::cast(re->DataAt(JSRegExp::kRe2kDataIndex));
+  return Handle<ByteArray>(ByteArray::cast(value->get(kRe2kCodeIndex)));
  }


@@ -648,14 +800,12 @@
    int AllocateRegister() { return next_register_++; }

    Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
-                              RegExpNode* start);
+                              RegExpNode* start,
+                              int capture_count,
+                              bool case_independent);

    inline void AddWork(RegExpNode* node) { work_list_->Add(node); }

-  static const int kImplementationOffset = 0;
-  static const int kNumberOfRegistersOffset = 0;
-  static const int kCodeOffset = 1;
-
    RegExpMacroAssembler* macro_assembler() {
      return macro_assembler_;
    }
@@ -666,34 +816,52 @@
  };


+// Attempts to compile the regexp using a Regexp2000 code generator.   
Returns
+// a fixed array or a null handle depending on whether it succeeded.
  Handle<FixedArray> RegExpCompiler::Assemble(
      RegExpMacroAssembler* macro_assembler,
-    RegExpNode* start) {
+    RegExpNode* start,
+    int capture_count,
+    bool case_independent) {
+  if (case_independent) return Handle<FixedArray>::null();
    macro_assembler_ = macro_assembler;
    List <RegExpNode*> work_list(0);
    work_list_ = &work_list;
-  start->GoTo(this);
+  Label fail;
+  macro_assembler->PushBacktrack(&fail);
+  if (!start->GoTo(this)) {
+    fail.Unuse();
+    return Handle<FixedArray>::null();
+  }
    while (!work_list.is_empty()) {
-    work_list.RemoveLast()->Emit(this);
+    if (!work_list.RemoveLast()->Emit(this)) {
+      fail.Unuse();
+      return Handle<FixedArray>::null();
+    }
    }
-  Handle<FixedArray> array = Factory::NewFixedArray(3);
-  array->set(kImplementationOffset,
-             Smi::FromInt(macro_assembler->Implementation()),
-             SKIP_WRITE_BARRIER);
-  array->set(kNumberOfRegistersOffset,
-             Smi::FromInt(next_register_),
-             SKIP_WRITE_BARRIER);
+  macro_assembler->Bind(&fail);
+  macro_assembler->Fail();
+  Handle<FixedArray> array =
+      Factory::NewFixedArray(RegExpImpl::kRe2kDataLength);
+  array->set(RegExpImpl::kRe2kImplementationIndex,
+             Smi::FromInt(macro_assembler->Implementation()));
+  array->set(RegExpImpl::kRe2kNumberOfRegistersIndex,
+             Smi::FromInt(next_register_));
+  array->set(RegExpImpl::kRe2kNumberOfCapturesIndex,
+             Smi::FromInt(capture_count));
    Handle<Object> code = macro_assembler->GetCode();
+  array->set(RegExpImpl::kRe2kCodeIndex, *code);
    work_list_ = NULL;
    return array;
  }


-void RegExpNode::GoTo(RegExpCompiler* compiler) {
+bool RegExpNode::GoTo(RegExpCompiler* compiler) {
    if (label.is_bound()) {
      compiler->macro_assembler()->GoTo(&label);
+    return true;
    } else {
-    Emit(compiler);
+    return Emit(compiler);
    }
  }

@@ -707,6 +875,19 @@
  EndNode EndNode::kBacktrack(BACKTRACK);


+bool EndNode::Emit(RegExpCompiler* compiler) {
+  switch (action_) {
+    case ACCEPT:
+      compiler->macro_assembler()->Succeed();
+      return true;
+    case BACKTRACK:
+      compiler->macro_assembler()->Backtrack();
+      return true;
+  }
+  return false;
+}
+
+
  void GuardedAlternative::AddGuard(Guard* guard) {
    if (guards_ == NULL)
      guards_ = new ZoneList<Guard*>(1);
@@ -782,13 +963,13 @@
  // Emit code.


-void ChoiceNode::Emit(RegExpCompiler* compiler) {
+bool ChoiceNode::Emit(RegExpCompiler* compiler) {
    // TODO(erikcorry): Implement this.
-  UNREACHABLE();
+  return false;
  }


-void ActionNode::Emit(RegExpCompiler* compiler) {
+bool ActionNode::Emit(RegExpCompiler* compiler) {
    RegExpMacroAssembler* macro = compiler->macro_assembler();
    switch (type_) {
      case STORE_REGISTER:
@@ -806,17 +987,19 @@
        break;
      case BEGIN_SUBMATCH:
        // TODO(erikcorry): Implement this.
-      UNREACHABLE();
-      break;
+      return false;
      case ESCAPE_SUBMATCH:
        // TODO(erikcorry): Implement this.
-      UNREACHABLE();
-      break;
+      return false;
      case END_SUBMATCH:
        // TODO(erikcorry): Implement this.
+      return false;
+    default:
        UNREACHABLE();
-      break;
+      return false;
    }
+  compiler->AddWork(on_success());
+  return true;
  }


@@ -1610,7 +1793,9 @@
  }


-RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) {
+Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
+                                         RegExpNode** node_return,
+                                         bool ignore_case) {
    RegExpCompiler compiler(input->capture_count);
    // Wrap the body of the regexp in capture #0.
    RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
@@ -1630,9 +1815,13 @@
                                                &compiler,
                                                captured_body,
                                                EndNode::GetBacktrack());
+  if (node_return != NULL) *node_return = node;
    Analysis analysis(&compiler);
    analysis.Analyze(node);
-  return node;
+  byte codes[10240];
+  Re2kAssembler assembler(Vector<byte>(codes, 1024));
+  RegExpMacroAssemblerRe2k macro_assembler(&assembler);
+  return compiler.Assemble(&macro_assembler, node, input->capture_count,  
ignore_case);
  }

  RegExpMacroAssembler::RegExpMacroAssembler() {

Modified: branches/experimental/regexp2000/src/jsregexp.h
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.h     (original)
+++ branches/experimental/regexp2000/src/jsregexp.h     Fri Nov 14 03:26:08 2008
@@ -63,13 +63,21 @@

    // Stores an uncompiled RegExp pattern in the JSRegExp object.
    // It will be compiled by JSCRE when first executed.
-  static Handle<Object> JsrePrepare(Handle<JSRegExp> re,
+  static Handle<Object> JscrePrepare(Handle<JSRegExp> re,
+                                     Handle<String> pattern,
+                                     JSRegExp::Flags flags);
+
+  // Stores a compiled RegExp pattern in the JSRegExp object.
+  // The pattern is compiled by Regexp2000.
+  static Handle<Object> Re2kPrepare(Handle<JSRegExp> re,
                                      Handle<String> pattern,
-                                    JSRegExp::Flags flags);
+                                    JSRegExp::Flags flags,
+                                    Handle<FixedArray> re2k_data);
+

    // Compile the pattern using JSCRE and store the result in the
    // JSRegExp object.
-  static Handle<Object> JsreCompile(Handle<JSRegExp> re);
+  static Handle<Object> JscreCompile(Handle<JSRegExp> re);

    static Handle<Object> AtomCompile(Handle<JSRegExp> re,
                                      Handle<String> pattern,
@@ -82,16 +90,24 @@
    static Handle<Object> AtomExecGlobal(Handle<JSRegExp> regexp,
                                         Handle<String> subject);

-  static Handle<Object> JsreCompile(Handle<JSRegExp> re,
-                                    Handle<String> pattern,
-                                    JSRegExp::Flags flags);
+  static Handle<Object> JscreCompile(Handle<JSRegExp> re,
+                                     Handle<String> pattern,
+                                     JSRegExp::Flags flags);

    // Execute a compiled JSCRE pattern.
-  static Handle<Object> JsreExec(Handle<JSRegExp> regexp,
+  static Handle<Object> JscreExec(Handle<JSRegExp> regexp,
+                                  Handle<String> subject,
+                                  Handle<Object> index);
+
+  // Execute a Regexp2000 bytecode pattern.
+  static Handle<Object> Re2kExec(Handle<JSRegExp> regexp,
                                   Handle<String> subject,
                                   Handle<Object> index);

-  static Handle<Object> JsreExecGlobal(Handle<JSRegExp> regexp,
+  static Handle<Object> JscreExecGlobal(Handle<JSRegExp> regexp,
+                                        Handle<String> subject);
+
+  static Handle<Object> Re2kExecGlobal(Handle<JSRegExp> regexp,
                                         Handle<String> subject);

    static void NewSpaceCollectionPrologue();
@@ -103,16 +119,37 @@
    static Handle<String> StringToTwoByte(Handle<String> pattern);
    static Handle<String> CachedStringToTwoByte(Handle<String> pattern);

+  static const int kRe2kImplementationIndex = 0;
+  static const int kRe2kNumberOfCapturesIndex = 1;
+  static const int kRe2kNumberOfRegistersIndex = 2;
+  static const int kRe2kCodeIndex = 3;
+  static const int kRe2kDataLength = 4;
+
+  static const int kJscreNumberOfCapturesIndex = 0;
+  static const int kJscreInternalIndex = 1;
+  static const int kJscreDataLength = 2;
+
   private:
    static String* last_ascii_string_;
    static String* two_byte_cached_string_;

-  // Returns the caputure from the re.
-  static int JsreCapture(Handle<JSRegExp> re);
-  static ByteArray* JsreInternal(Handle<JSRegExp> re);
+  static int JscreNumberOfCaptures(Handle<JSRegExp> re);
+  static ByteArray* JscreInternal(Handle<JSRegExp> re);
+
+  static int Re2kNumberOfCaptures(Handle<JSRegExp> re);
+  static int Re2kNumberOfRegisters(Handle<JSRegExp> re);
+  static Handle<ByteArray> Re2kCode(Handle<JSRegExp> re);

    // Call jsRegExpExecute once
-  static Handle<Object> JsreExecOnce(Handle<JSRegExp> regexp,
+  static Handle<Object> JscreExecOnce(Handle<JSRegExp> regexp,
+                                      int num_captures,
+                                      Handle<String> subject,
+                                      int previous_index,
+                                      const uc16* utf8_subject,
+                                      int* ovector,
+                                      int ovector_length);
+
+  static Handle<Object> Re2kExecOnce(Handle<JSRegExp> regexp,
                                       int num_captures,
                                       Handle<String> subject,
                                       int previous_index,
@@ -122,8 +159,10 @@

    // Set the subject cache.  The previous string buffer is not deleted, so  
the
    // caller should ensure that it doesn't leak.
-  static void SetSubjectCache(String* subject, char* utf8_subject,
-                              int uft8_length, int character_position,
+  static void SetSubjectCache(String* subject,
+                              char* utf8_subject,
+                              int uft8_length,
+                              int character_position,
                                int utf8_position);

    // A one element cache of the last utf8_subject string and its length.   
The
@@ -362,9 +401,13 @@
    virtual ~RegExpNode() { }
    virtual void Accept(NodeVisitor* visitor) = 0;
    // Generates a goto to this node or actually generates the code at this  
point.
-  void GoTo(RegExpCompiler* compiler);
+  // Until the implementation is complete we will return true for success  
and
+  // false for failure.
+  bool GoTo(RegExpCompiler* compiler);
    void EmitAddress(RegExpCompiler* compiler);
-  virtual void Emit(RegExpCompiler* compiler) = 0;
+  // Until the implementation is complete we will return true for success  
and
+  // false for failure.
+  virtual bool Emit(RegExpCompiler* compiler) = 0;
   private:
    Label label;
  };
@@ -375,7 +418,7 @@
    explicit SeqRegExpNode(RegExpNode* on_success)
      : on_success_(on_success) { }
    RegExpNode* on_success() { return on_success_; }
-  virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
+  virtual bool Emit(RegExpCompiler* compiler) { return false; }
   private:
    RegExpNode* on_success_;
  };
@@ -400,7 +443,7 @@
    static ActionNode* EscapeSubmatch(RegExpNode* on_success);
    static ActionNode* EndSubmatch(RegExpNode* on_success);
    virtual void Accept(NodeVisitor* visitor);
-  virtual void Emit(RegExpCompiler* compiler);
+  virtual bool Emit(RegExpCompiler* compiler);
   private:
    union {
      struct {
@@ -433,7 +476,7 @@
    virtual void Accept(NodeVisitor* visitor);
    Vector<const uc16> data() { return data_; }
    RegExpNode* on_failure() { return on_failure_; }
-  virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
+  virtual bool Emit(RegExpCompiler* compiler) { return false; }
   private:
    RegExpNode* on_failure_;
    Vector<const uc16> data_;
@@ -454,7 +497,7 @@
    RegExpNode* on_failure() { return on_failure_; }
    int start_register() { return start_reg_; }
    int end_register() { return end_reg_; }
-  virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
+  virtual bool Emit(RegExpCompiler* compiler) { return false; }
   private:
    RegExpNode* on_failure_;
    int start_reg_;
@@ -476,7 +519,7 @@
    ZoneList<CharacterRange>* ranges() { return ranges_; }
    bool is_negated() { return is_negated_; }
    RegExpNode* on_failure() { return on_failure_; }
-  virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
+  virtual bool Emit(RegExpCompiler* compiler) { return false; }
    static void AddInverseToTable(ZoneList<CharacterRange>* ranges,
                                  DispatchTable* table,
                                  int index);
@@ -493,7 +536,7 @@
    virtual void Accept(NodeVisitor* visitor);
    static EndNode* GetAccept() { return &kAccept; }
    static EndNode* GetBacktrack() { return &kBacktrack; }
-  virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
+  virtual bool Emit(RegExpCompiler* compiler);
   private:
    explicit EndNode(Action action) : action_(action) { }
    Action action_;
@@ -542,7 +585,7 @@
    ZoneList<GuardedAlternative>* choices() { return choices_; }
    DispatchTable* table() { return &table_; }
    RegExpNode* on_failure() { return on_failure_; }
-  virtual void Emit(RegExpCompiler* compiler);
+  virtual bool Emit(RegExpCompiler* compiler);
    bool visited() { return visited_; }
    void set_visited(bool value) { visited_ = value; }
   private:
@@ -563,7 +606,9 @@

  class RegExpEngine: public AllStatic {
   public:
-  static RegExpNode* Compile(RegExpParseResult* input);
+  static Handle<FixedArray> Compile(RegExpParseResult* input,
+                                    RegExpNode** node_return,
+                                    bool ignore_case);
    static void DotPrint(const char* label, RegExpNode* node);
  };


Modified: branches/experimental/regexp2000/src/objects-debug.cc
==============================================================================
--- branches/experimental/regexp2000/src/objects-debug.cc       (original)
+++ branches/experimental/regexp2000/src/objects-debug.cc       Fri Nov 14  
03:26:08 2008
@@ -674,6 +674,12 @@
        ASSERT(jscre_data->IsFixedArray() || jscre_data->IsUndefined());
        break;
      }
+    case JSRegExp::RE2K: {
+      FixedArray* arr = FixedArray::cast(data());
+      Object* jscre_data = arr->get(JSRegExp::kJscreDataIndex);
+      ASSERT(jscre_data->IsFixedArray());
+      break;
+    }
      default:
        ASSERT_EQ(JSRegExp::NOT_COMPILED, TypeTag());
        ASSERT(data()->IsUndefined());

Modified: branches/experimental/regexp2000/src/objects-inl.h
==============================================================================
--- branches/experimental/regexp2000/src/objects-inl.h  (original)
+++ branches/experimental/regexp2000/src/objects-inl.h  Fri Nov 14 03:26:08  
2008
@@ -1142,6 +1142,13 @@
  }


+void FixedArray::set(int index, Smi* value) {
+  ASSERT(reinterpret_cast<Object*>(value)->IsSmi());
+  int offset = kHeaderSize + index * kPointerSize;
+  WRITE_FIELD(this, offset, value);
+}
+
+
  void FixedArray::set(int index, Object* value) {
    ASSERT(index >= 0 && index < this->length());
    int offset = kHeaderSize + index * kPointerSize;

Modified: branches/experimental/regexp2000/src/objects.h
==============================================================================
--- branches/experimental/regexp2000/src/objects.h      (original)
+++ branches/experimental/regexp2000/src/objects.h      Fri Nov 14 03:26:08 2008
@@ -1498,9 +1498,12 @@

    // Setter and getter for elements.
    inline Object* get(int index);
+  // Setter that uses write barrier.
    inline void set(int index, Object* value);

-  // Setter with barrier mode.
+  // Setter that doesn't need write barrier).
+  inline void set(int index, Smi* value);
+  // Setter with explicit barrier mode.
    inline void set(int index, Object* value, WriteBarrierMode mode);

    // Setters for frequently used oddballs located in old space.
@@ -2916,7 +2919,9 @@
    // NOT_COMPILED: Initial value. No data has been stored in the JSRegExp  
yet.
    // JSCRE: A complex RegExp for JSCRE
    // ATOM: A simple string to match against using an indexOf operation.
-  enum Type { NOT_COMPILED, JSCRE, ATOM };
+  // RE2K: Compiled with RegExp2000.
+  // RE2K_NATIVE: Compiled to native code with RegExp2000.
+  enum Type { NOT_COMPILED, JSCRE, ATOM, RE2K, RE2K_NATIVE };
    enum Flag { NONE = 0, GLOBAL = 1, IGNORE_CASE = 2, MULTILINE = 4 };

    class Flags {
@@ -2951,10 +2956,11 @@
    static const int kTagIndex = 0;
    static const int kSourceIndex = kTagIndex + 1;
    static const int kFlagsIndex = kSourceIndex + 1;
-  // These two are the same since the same entry is shared for
+  // These three are the same since the same entry is shared for
    // different purposes in different types of regexps.
    static const int kAtomPatternIndex = kFlagsIndex + 1;
    static const int kJscreDataIndex = kFlagsIndex + 1;
+  static const int kRe2kDataIndex = kFlagsIndex + 1;
    static const int kDataSize = kAtomPatternIndex + 1;
  };


Modified: branches/experimental/regexp2000/test/cctest/test-regexp.cc
==============================================================================
--- branches/experimental/regexp2000/test/cctest/test-regexp.cc (original)
+++ branches/experimental/regexp2000/test/cctest/test-regexp.cc Fri Nov 14  
03:26:08 2008
@@ -338,7 +338,8 @@
    RegExpParseResult result;
    if (!v8::internal::ParseRegExp(&buffer, &result))
      return;
-  RegExpNode* node = RegExpEngine::Compile(&result);
+  RegExpNode* node = NULL;
+  RegExpEngine::Compile(&result, &node, false);
    USE(node);
  #ifdef DEBUG
    if (dot_output) {

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to