Author: [EMAIL PROTECTED]
Date: Mon Dec  8 04:43:01 2008
New Revision: 937

Modified:
    branches/bleeding_edge/src/jsregexp.cc
    branches/bleeding_edge/src/jsregexp.h
    branches/bleeding_edge/src/objects.h
    branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc
    branches/bleeding_edge/test/cctest/test-regexp.cc

Log:
Irregexp is specialized on subject character type.


Modified: branches/bleeding_edge/src/jsregexp.cc
==============================================================================
--- branches/bleeding_edge/src/jsregexp.cc      (original)
+++ branches/bleeding_edge/src/jsregexp.cc      Mon Dec  8 04:43:01 2008
@@ -201,6 +201,50 @@
  }


+// Generic RegExp methods. Dispatches to implementation specific methods.
+
+
+class OffsetsVector {
+ public:
+  inline OffsetsVector(int num_registers)
+      : offsets_vector_length_(num_registers) {
+    if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
+      vector_ = NewArray<int>(offsets_vector_length_);
+    } else {
+      vector_ = static_offsets_vector_;
+    }
+  }
+
+
+  inline ~OffsetsVector() {
+    if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
+      DeleteArray(vector_);
+      vector_ = NULL;
+    }
+  }
+
+
+  inline int* vector() {
+    return vector_;
+  }
+
+
+  inline int length() {
+    return offsets_vector_length_;
+  }
+
+ private:
+  int* vector_;
+  int offsets_vector_length_;
+  static const int kStaticOffsetsVectorSize = 50;
+  static int static_offsets_vector_[kStaticOffsetsVectorSize];
+};
+
+
+int OffsetsVector::static_offsets_vector_[
+    OffsetsVector::kStaticOffsetsVectorSize];
+
+
  Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
                                     Handle<String> pattern,
                                     Handle<String> flag_str) {
@@ -224,7 +268,7 @@
                             pattern,
                             parse_result.error,
                             "malformed_regexp");
-      return Handle<Object>();
+      return Handle<Object>::null();
      }
      RegExpAtom* atom = parse_result.tree->AsAtom();
      if (atom != NULL && !flags.is_ignore_case()) {
@@ -237,20 +281,10 @@
          result = AtomCompile(re, pattern, flags, pattern);
        }
      } else {
-      RegExpNode* node = NULL;
-      Handle<FixedArray> irregexp_data =
-          RegExpEngine::Compile(&parse_result,
-                                &node,
-                                flags.is_ignore_case(),
-                                flags.is_multiline(),
-                                pattern);
-      if (irregexp_data.is_null()) {
-        if (FLAG_disable_jscre) {
-          UNIMPLEMENTED();
-        }
-        result = JscrePrepare(re, pattern, flags);
+      if (FLAG_irregexp) {
+        result = IrregexpPrepare(re, pattern, flags);
        } else {
-        result = IrregexpPrepare(re, pattern, flags, irregexp_data);
+        result = JscrePrepare(re, pattern, flags);
        }
      }
      Object* data = re->data();
@@ -270,18 +304,30 @@
                                  Handle<String> subject,
                                  Handle<Object> index) {
    switch (regexp->TypeTag()) {
+    case JSRegExp::ATOM:
+      return AtomExec(regexp, subject, index);
+    case JSRegExp::IRREGEXP: {
+      Handle<Object> result = IrregexpExec(regexp, subject, index);
+      if (!result.is_null()) {
+        return result;
+      }
+      // We couldn't handle the regexp using Irregexp, so fall back
+      // on JSCRE. We rejoice at the though of the day when this is
+      // no longer needed.
+      // Reset the JSRegExp to use JSCRE.
+      JscrePrepare(regexp,
+                   Handle<String>(regexp->Pattern()),
+                   regexp->GetFlags());
+      // Fall-through to JSCRE.
+    }
      case JSRegExp::JSCRE:
        if (FLAG_disable_jscre) {
          UNIMPLEMENTED();
        }
        return JscreExec(regexp, subject, index);
-    case JSRegExp::ATOM:
-      return AtomExec(regexp, subject, index);
-    case JSRegExp::IRREGEXP:
-      return IrregexpExec(regexp, subject, index);
      default:
        UNREACHABLE();
-      return Handle<Object>();
+      return Handle<Object>::null();
    }
  }

@@ -289,22 +335,37 @@
  Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
                                  Handle<String> subject) {
    switch (regexp->TypeTag()) {
+    case JSRegExp::ATOM:
+      return AtomExecGlobal(regexp, subject);
+    case JSRegExp::IRREGEXP: {
+      Handle<Object> result = IrregexpExecGlobal(regexp, subject);
+      if (!result.is_null()) {
+        return result;
+      }
+      // We couldn't handle the regexp using Irregexp, so fall back
+      // on JSCRE. We rejoice at the though of the day when this is
+      // no longer needed.
+      // Reset the JSRegExp to use JSCRE.
+      JscrePrepare(regexp,
+                   Handle<String>(regexp->Pattern()),
+                   regexp->GetFlags());
+      // Fall-through to JSCRE.
+    }
      case JSRegExp::JSCRE:
        if (FLAG_disable_jscre) {
          UNIMPLEMENTED();
        }
        return JscreExecGlobal(regexp, subject);
-    case JSRegExp::ATOM:
-      return AtomExecGlobal(regexp, subject);
-    case JSRegExp::IRREGEXP:
-      return IrregexpExecGlobal(regexp, subject);
      default:
        UNREACHABLE();
-      return Handle<Object>();
+      return Handle<Object>::null();
    }
  }


+// RegExp Atom implementation: Simple string search using indexOf.
+
+
  Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re,
                                         Handle<String> pattern,
                                         JSRegExp::Flags flags,
@@ -366,6 +427,21 @@
  }


+// JSCRE implementation.
+
+
+int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) {
+  FixedArray* value =  
FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
+  return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->value();
+}
+
+
+ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) {
+  FixedArray* value =  
FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
+  return ByteArray::cast(value->get(kJscreInternalIndex));
+}
+
+
  Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re,
                                         Handle<String> pattern,
                                         JSRegExp::Flags flags) {
@@ -375,20 +451,11 @@
  }


-Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
-                                          Handle<String> pattern,
-                                          JSRegExp::Flags flags,
-                                          Handle<FixedArray>  
irregexp_data) {
-  Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags,  
irregexp_data);
-  return re;
-}
-
-
-static inline Object* DoCompile(String* pattern,
-                                JSRegExp::Flags flags,
-                                unsigned* number_of_captures,
-                                const char** error_message,
-                                v8::jscre::JscreRegExp** code) {
+static inline Object* JscreDoCompile(String* pattern,
+                                     JSRegExp::Flags flags,
+                                     unsigned* number_of_captures,
+                                     const char** error_message,
+                                     v8::jscre::JscreRegExp** code) {
    v8::jscre::JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case()
      ? v8::jscre::JSRegExpIgnoreCase
      : v8::jscre::JSRegExpDoNotIgnoreCase;
@@ -417,16 +484,16 @@
  }


-void CompileWithRetryAfterGC(Handle<String> pattern,
-                             JSRegExp::Flags flags,
-                             unsigned* number_of_captures,
-                             const char** error_message,
-                             v8::jscre::JscreRegExp** code) {
-  CALL_HEAP_FUNCTION_VOID(DoCompile(*pattern,
-                                    flags,
-                                    number_of_captures,
-                                    error_message,
-                                    code));
+static void JscreCompileWithRetryAfterGC(Handle<String> pattern,
+                                         JSRegExp::Flags flags,
+                                         unsigned* number_of_captures,
+                                         const char** error_message,
+                                         v8::jscre::JscreRegExp** code) {
+  CALL_HEAP_FUNCTION_VOID(JscreDoCompile(*pattern,
+                                         flags,
+                                         number_of_captures,
+                                         error_message,
+                                         code));
  }


@@ -445,11 +512,11 @@
    v8::jscre::JscreRegExp* code = NULL;
    FlattenString(pattern);

-  CompileWithRetryAfterGC(two_byte_pattern,
-                          flags,
-                          &number_of_captures,
-                          &error_message,
-                          &code);
+  JscreCompileWithRetryAfterGC(two_byte_pattern,
+                               flags,
+                               &number_of_captures,
+                               &error_message,
+                               &code);

    if (code == NULL) {
      // Throw an exception.
@@ -476,92 +543,31 @@
  }


-Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp,
-                                            int num_captures,
-                                            Handle<String>  
two_byte_subject,
-                                            int previous_index,
-                                            int* offsets_vector,
-                                            int offsets_vector_length) {
-#ifdef DEBUG
-  if (FLAG_trace_regexp_bytecodes) {
-    String* pattern = regexp->Pattern();
-    PrintF("\n\nRegexp match:   /%s/\n\n", *(pattern->ToCString()));
-    PrintF("\n\nSubject string: '%s'\n\n",  
*(two_byte_subject->ToCString()));
-  }
-#endif
-  ASSERT(StringShape(*two_byte_subject).IsTwoByteRepresentation());
-  ASSERT(two_byte_subject->IsFlat(StringShape(*two_byte_subject)));
-  bool rc;
-
-  for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
-    offsets_vector[i] = -1;
+Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,
+                                     Handle<String> subject,
+                                     Handle<Object> index) {
+  ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
+  if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
+    Handle<Object> compile_result = JscreCompile(regexp);
+    if (compile_result.is_null()) return compile_result;
    }
+  ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());

-  LOG(RegExpExecEvent(regexp, previous_index, two_byte_subject));
-
-  FixedArray* irregexp =
-      FixedArray::cast(regexp->DataAt(JSRegExp::kIrregexpDataIndex));
-  int tag =  
Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
+  int num_captures = JscreNumberOfCaptures(regexp);

-  switch (tag) {
-    case RegExpMacroAssembler::kIA32Implementation: {
-#ifndef ARM
-      Code* code = Code::cast(irregexp->get(kIrregexpCodeIndex));
-      Address start_addr =
-           
Handle<SeqTwoByteString>::cast(two_byte_subject)->GetCharsAddress();
-      int string_offset =
-          start_addr - reinterpret_cast<Address>(*two_byte_subject);
-      int start_offset = string_offset + previous_index * sizeof(uc16);
-      int end_offset =
-          string_offset + two_byte_subject->length() * sizeof(uc16);
-      rc = RegExpMacroAssemblerIA32::Execute(code,
-                                             two_byte_subject.location(),
-                                             start_offset,
-                                             end_offset,
-                                             offsets_vector,
-                                             previous_index == 0);
-      if (rc) {
-        // Capture values are relative to start_offset only.
-        for (int i = 0; i < offsets_vector_length; i++) {
-          if (offsets_vector[i] >= 0) {
-            offsets_vector[i] += previous_index;
-          }
-        }
-      }
-      break;
-#else
-      UNIMPLEMENTED();
-      rc = false;
-      break;
-#endif
-    }
-    case RegExpMacroAssembler::kBytecodeImplementation: {
-      Handle<ByteArray> byte_codes = IrregexpCode(regexp);
+  OffsetsVector offsets((num_captures + 1) * 3);

-      rc = IrregexpInterpreter::Match(byte_codes,
-                                      two_byte_subject,
-                                      offsets_vector,
-                                      previous_index);
-      break;
-    }
-    case RegExpMacroAssembler::kARMImplementation:
-    default:
-      UNREACHABLE();
-      rc = false;
-      break;
-  }
+  int previous_index = static_cast<int>(DoubleToInteger(index->Number()));

-  if (!rc) {
-    return Factory::null_value();
-  }
+  Handle<String> subject16 = CachedStringToTwoByte(subject);

-  Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
-  // The captures come in (start, end+1) pairs.
-  for (int i = 0; i < 2 * (num_captures+1); i += 2) {
-    array->set(i, Smi::FromInt(offsets_vector[i]));
-    array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
-  }
-  return Factory::NewJSArrayWithElements(array);
+  return JscreExecOnce(regexp,
+                       num_captures,
+                       subject,
+                       previous_index,
+                       subject16->GetTwoByteData(),
+                       offsets.vector(),
+                       offsets.length());
  }


@@ -617,76 +623,8 @@
  }


-class OffsetsVector {
- public:
-  inline OffsetsVector(int num_registers)
-      : offsets_vector_length_(num_registers) {
-    if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
-      vector_ = NewArray<int>(offsets_vector_length_);
-    } else {
-      vector_ = static_offsets_vector_;
-    }
-  }
-
-
-  inline ~OffsetsVector() {
-    if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
-      DeleteArray(vector_);
-      vector_ = NULL;
-    }
-  }
-
-
-  inline int* vector() {
-    return vector_;
-  }
-
-
-  inline int length() {
-    return offsets_vector_length_;
-  }
-
- private:
-  int* vector_;
-  int offsets_vector_length_;
-  static const int kStaticOffsetsVectorSize = 50;
-  static int static_offsets_vector_[kStaticOffsetsVectorSize];
-};
-
-
-int OffsetsVector::static_offsets_vector_[
-    OffsetsVector::kStaticOffsetsVectorSize];
-
-
-Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
-                                        Handle<String> subject,
-                                        Handle<Object> index) {
-  ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
-  ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined());
-
-  // Prepare space for the return values.
-  int number_of_registers = IrregexpNumberOfRegisters(regexp);
-  OffsetsVector offsets(number_of_registers);
-
-  int num_captures = IrregexpNumberOfCaptures(regexp);
-
-  int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
-
-  Handle<String> subject16 = CachedStringToTwoByte(subject);
-
-  Handle<Object> result(IrregexpExecOnce(regexp,
-                                         num_captures,
-                                         subject16,
-                                         previous_index,
-                                         offsets.vector(),
-                                         offsets.length()));
-  return result;
-}
-
-
-Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,
-                                     Handle<String> subject,
-                                     Handle<Object> index) {
+Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
+                                           Handle<String> subject) {
    ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
    if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
      Handle<Object> compile_result = JscreCompile(regexp);
@@ -694,35 +632,11 @@
    }
    ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());

+  // Prepare space for the return values.
    int num_captures = JscreNumberOfCaptures(regexp);

    OffsetsVector offsets((num_captures + 1) * 3);

-  int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
-
-  Handle<String> subject16 = CachedStringToTwoByte(subject);
-
-  Handle<Object> result(JscreExecOnce(regexp,
-                                      num_captures,
-                                      subject,
-                                      previous_index,
-                                      subject16->GetTwoByteData(),
-                                      offsets.vector(),
-                                      offsets.length()));
-
-  return result;
-}
-
-
-Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
-                                              Handle<String> subject) {
-  ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
-  ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined());
-
-  // Prepare space for the return values.
-  int number_of_registers = IrregexpNumberOfRegisters(regexp);
-  OffsetsVector offsets(number_of_registers);
-
    int previous_index = 0;

    Handle<JSArray> result = Factory::NewJSArray(0);
@@ -737,12 +651,13 @@
        // string length, there is no match.
        matches = Factory::null_value();
      } else {
-      matches = IrregexpExecOnce(regexp,
-                                 IrregexpNumberOfCaptures(regexp),
-                                 subject16,
-                                 previous_index,
-                                 offsets.vector(),
-                                 offsets.length());
+      matches = JscreExecOnce(regexp,
+                              num_captures,
+                              subject,
+                              previous_index,
+                              subject16->GetTwoByteData(),
+                              offsets.vector(),
+                              offsets.length());

        if (matches->IsJSArray()) {
          SetElement(result, i, matches);
@@ -766,19 +681,146 @@
  }


-Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
-                                           Handle<String> subject) {
-  ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
-  if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
-    Handle<Object> compile_result = JscreCompile(regexp);
-    if (compile_result.is_null()) return compile_result;
+// Irregexp implementation.
+
+
+static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re,
+                                              bool is_ascii) {
+  ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
+  Handle<FixedArray> alternatives(
+      FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)));
+  ASSERT_EQ(2, alternatives->length());
+
+  int index = is_ascii ? 0 : 1;
+  Object* entry = alternatives->get(index);
+  if (!entry->IsNull()) {
+    return Handle<FixedArray>(FixedArray::cast(entry));
+  }
+
+  // Compile the RegExp.
+  ZoneScope zone_scope(DELETE_ON_EXIT);
+
+  JSRegExp::Flags flags = re->GetFlags();
+
+  Handle<String> pattern(re->Pattern());
+  StringShape shape(*pattern);
+  if (!pattern->IsFlat(shape)) {
+    pattern->Flatten(shape);
+  }
+
+  RegExpParseResult parse_result;
+  FlatStringReader reader(pattern);
+  if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
+    // Throw an exception if we fail to parse the pattern.
+    // THIS SHOULD NOT HAPPEN. We already parsed it successfully once.
+    ThrowRegExpException(re,
+                         pattern,
+                         parse_result.error,
+                         "malformed_regexp");
+    return Handle<FixedArray>::null();
+  }
+  Handle<FixedArray> compiled_entry =
+      RegExpEngine::Compile(&parse_result,
+                            NULL,
+                            flags.is_ignore_case(),
+                            flags.is_multiline(),
+                            pattern,
+                            is_ascii);
+  if (!compiled_entry.is_null()) {
+    alternatives->set(index, *compiled_entry);
+  }
+  return compiled_entry;
+}
+
+
+int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> irre) {
+  return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value();
+}
+
+
+int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> irre) {
+  return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value();
+}
+
+
+Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> irre) {
+  ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value()
+      == RegExpMacroAssembler::kBytecodeImplementation);
+  return Handle<ByteArray>(ByteArray::cast(irre->get(kIrregexpCodeIndex)));
+}
+
+
+Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> irre) {
+  ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value()
+      != RegExpMacroAssembler::kBytecodeImplementation);
+  return Handle<Code>(Code::cast(irre->get(kIrregexpCodeIndex)));
+}
+
+
+Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
+                                          Handle<String> pattern,
+                                          JSRegExp::Flags flags) {
+  // Make space for ASCII and UC16 versions.
+  Handle<FixedArray> alternatives = Factory::NewFixedArray(2);
+  alternatives->set_null(0);
+  alternatives->set_null(1);
+  Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags,  
alternatives);
+  return re;
+}
+
+
+Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
+                                        Handle<String> subject,
+                                        Handle<Object> index) {
+  ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
+  ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
+
+  bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
+  Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii);
+  if (irregexp.is_null()) {
+    // We can't handle the RegExp with IRRegExp.
+    return Handle<Object>::null();
    }
-  ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());

    // Prepare space for the return values.
-  int num_captures = JscreNumberOfCaptures(regexp);
+  int number_of_registers = IrregexpNumberOfRegisters(irregexp);
+  OffsetsVector offsets(number_of_registers);
+
+  int num_captures = IrregexpNumberOfCaptures(irregexp);
+
+  int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
+
+#ifdef DEBUG
+  if (FLAG_trace_regexp_bytecodes) {
+    String* pattern = regexp->Pattern();
+    PrintF("\n\nRegexp match:   /%s/\n\n", *(pattern->ToCString()));
+    PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
+  }
+#endif
+  LOG(RegExpExecEvent(regexp, previous_index, subject));
+  return IrregexpExecOnce(irregexp,
+                          num_captures,
+                          subject,
+                          previous_index,
+                          offsets.vector(),
+                          offsets.length());
+}

-  OffsetsVector offsets((num_captures + 1) * 3);
+
+Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
+                                              Handle<String> subject) {
+  ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
+
+  StringShape shape(*subject);
+  bool is_ascii = shape.IsAsciiRepresentation();
+  Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii);
+  if (irregexp.is_null()) {
+    return Handle<Object>::null();
+  }
+
+  // Prepare space for the return values.
+  int number_of_registers = IrregexpNumberOfRegisters(irregexp);
+  OffsetsVector offsets(number_of_registers);

    int previous_index = 0;

@@ -786,7 +828,9 @@
    int i = 0;
    Handle<Object> matches;

-  Handle<String> subject16 = CachedStringToTwoByte(subject);
+  if (!subject->IsFlat(shape)) {
+    subject->Flatten(shape);
+  }

    do {
      if (previous_index > subject->length() || previous_index < 0) {
@@ -794,13 +838,20 @@
        // string length, there is no match.
        matches = Factory::null_value();
      } else {
-      matches = JscreExecOnce(regexp,
-                              num_captures,
-                              subject,
-                              previous_index,
-                              subject16->GetTwoByteData(),
-                              offsets.vector(),
-                              offsets.length());
+#ifdef DEBUG
+      if (FLAG_trace_regexp_bytecodes) {
+        String* pattern = regexp->Pattern();
+        PrintF("\n\nRegexp match:   /%s/\n\n", *(pattern->ToCString()));
+        PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
+      }
+#endif
+      LOG(RegExpExecEvent(regexp, previous_index, subject));
+      matches = IrregexpExecOnce(irregexp,
+                                 IrregexpNumberOfCaptures(irregexp),
+                                 subject,
+                                 previous_index,
+                                 offsets.vector(),
+                                 offsets.length());

        if (matches->IsJSArray()) {
          SetElement(result, i, matches);
@@ -824,36 +875,120 @@
  }


-int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) {
-  FixedArray* value =  
FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
-  return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->value();
-}
+Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp,
+                                            int num_captures,
+                                            Handle<String> subject,
+                                            int previous_index,
+                                            int* offsets_vector,
+                                            int offsets_vector_length) {
+  bool rc;

+  int tag =  
Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();

-ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) {
-  FixedArray* value =  
FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
-  return ByteArray::cast(value->get(kJscreInternalIndex));
-}
+  switch (tag) {
+    case RegExpMacroAssembler::kIA32Implementation: {
+#ifndef ARM
+      if (!subject->IsFlat(StringShape(*subject))) {
+        FlattenString(subject);
+      }
+      Handle<Code> code = IrregexpNativeCode(irregexp);

+      StringShape shape(*subject);

-int RegExpImpl::IrregexpNumberOfCaptures(Handle<JSRegExp> re) {
-  FixedArray* value =
-      FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
-  return Smi::cast(value->get(kIrregexpNumberOfCapturesIndex))->value();
-}
+      // Character offsets into string.
+      int start_offset = previous_index;
+      int end_offset = subject->length(shape);
+
+      if (shape.IsCons()) {
+        subject = Handle<String>(ConsString::cast(*subject)->first());
+      } else if (shape.IsSliced()) {
+        SlicedString* slice = SlicedString::cast(*subject);
+        start_offset += slice->start();
+        end_offset += slice->start();
+        subject = Handle<String>(slice->buffer());
+      }
+
+      // String is now either Sequential or External
+      StringShape flatshape(*subject);
+      bool is_ascii = flatshape.IsAsciiRepresentation();
+      int char_size = is_ascii ? sizeof(char) : sizeof(uc16);  // NOLINT
+
+      if (flatshape.IsExternal()) {
+        const byte* address;
+        if (is_ascii) {
+          ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
+          address = reinterpret_cast<const byte*>(ext->resource()->data());
+        } else {
+          ExternalTwoByteString* ext =  
ExternalTwoByteString::cast(*subject);
+          address = reinterpret_cast<const byte*>(ext->resource()->data());
+        }
+        rc = RegExpMacroAssemblerIA32::Execute(
+            *code,
+            &address,
+            start_offset * char_size,
+            end_offset * char_size,
+            offsets_vector,
+            previous_index == 0);
+      } else {  // Sequential string
+        int byte_offset =
+            is_ascii ? SeqAsciiString::kHeaderSize - kHeapObjectTag:
+                       SeqTwoByteString::kHeaderSize - kHeapObjectTag;
+        rc = RegExpMacroAssemblerIA32::Execute(
+            *code,
+            subject.location(),
+            byte_offset + start_offset * char_size,
+            byte_offset + end_offset * char_size,
+            offsets_vector,
+            previous_index == 0);
+      }

+      if (rc) {
+        // Capture values are relative to start_offset only.
+        for (int i = 0; i < offsets_vector_length; i++) {
+          if (offsets_vector[i] >= 0) {
+            offsets_vector[i] += previous_index;
+          }
+        }
+      }
+      break;
+#else
+      UNIMPLEMENTED();
+      rc = false;
+      break;
+#endif
+    }
+    case RegExpMacroAssembler::kBytecodeImplementation: {
+      for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
+        offsets_vector[i] = -1;
+      }
+      Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp);

-int RegExpImpl::IrregexpNumberOfRegisters(Handle<JSRegExp> re) {
-  FixedArray* value =
-      FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
-  return Smi::cast(value->get(kIrregexpNumberOfRegistersIndex))->value();
-}
+      Handle<String> two_byte_subject = CachedStringToTwoByte(subject);

+      rc = IrregexpInterpreter::Match(byte_codes,
+                                      two_byte_subject,
+                                      offsets_vector,
+                                      previous_index);
+      break;
+    }
+    case RegExpMacroAssembler::kARMImplementation:
+    default:
+      UNREACHABLE();
+      rc = false;
+      break;
+  }
+
+  if (!rc) {
+    return Factory::null_value();
+  }

-Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) {
-  FixedArray* value =
-      FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
-  return  
Handle<ByteArray>(ByteArray::cast(value->get(kIrregexpCodeIndex)));
+  Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
+  // The captures come in (start, end+1) pairs.
+  for (int i = 0; i < 2 * (num_captures+1); i += 2) {
+    array->set(i, Smi::FromInt(offsets_vector[i]));
+    array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
+  }
+  return Factory::NewJSArrayWithElements(array);
  }


@@ -3475,7 +3610,8 @@
                                           RegExpNode** node_return,
                                           bool ignore_case,
                                           bool is_multiline,
-                                         Handle<String> pattern) {
+                                         Handle<String> pattern,
+                                         bool is_ascii) {
    RegExpCompiler compiler(input->capture_count, ignore_case);
    // Wrap the body of the regexp in capture #0.
    RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
@@ -3500,10 +3636,6 @@
    NodeInfo info = *node->info();
    node = node->EnsureExpanded(&info);

-  if (!FLAG_irregexp) {
-    return Handle<FixedArray>::null();
-  }
-
    if (is_multiline && !FLAG_attempt_multiline_irregexp) {
      return Handle<FixedArray>::null();
    }
@@ -3512,7 +3644,13 @@
  #ifdef ARM
      // Unimplemented, fall-through to bytecode implementation.
  #else  // IA32
-    RegExpMacroAssemblerIA32  
macro_assembler(RegExpMacroAssemblerIA32::UC16,
+    RegExpMacroAssemblerIA32::Mode mode;
+    if (is_ascii) {
+      mode = RegExpMacroAssemblerIA32::ASCII;
+    } else {
+      mode = RegExpMacroAssemblerIA32::UC16;
+    }
+    RegExpMacroAssemblerIA32 macro_assembler(mode,
                                               (input->capture_count + 1) *  
2);
      return compiler.Assemble(&macro_assembler,
                               node,

Modified: branches/bleeding_edge/src/jsregexp.h
==============================================================================
--- branches/bleeding_edge/src/jsregexp.h       (original)
+++ branches/bleeding_edge/src/jsregexp.h       Mon Dec  8 04:43:01 2008
@@ -48,6 +48,9 @@
    // This function calls the garbage collector if necessary.
    static Handle<String> ToString(Handle<Object> value);

+  // Parses the RegExp pattern and prepares the JSRegExp object with
+  // generic data and choice of implementation - as well as what
+  // the implementation wants to store in the data field.
    static Handle<Object> Compile(Handle<JSRegExp> re,
                                  Handle<String> pattern,
                                  Handle<String> flags);
@@ -71,12 +74,10 @@
                                       Handle<String> pattern,
                                       JSRegExp::Flags flags);

-  // Stores a compiled RegExp pattern in the JSRegExp object.
-  // The pattern is compiled by Irregexp.
+  // Prepares a JSRegExp object with Irregexp-specific data.
    static Handle<Object> IrregexpPrepare(Handle<JSRegExp> re,
                                          Handle<String> pattern,
-                                        JSRegExp::Flags flags,
-                                        Handle<FixedArray> irregexp_data);
+                                        JSRegExp::Flags flags);


    // Compile the pattern using JSCRE and store the result in the
@@ -140,9 +141,10 @@
    static int JscreNumberOfCaptures(Handle<JSRegExp> re);
    static ByteArray* JscreInternal(Handle<JSRegExp> re);

-  static int IrregexpNumberOfCaptures(Handle<JSRegExp> re);
-  static int IrregexpNumberOfRegisters(Handle<JSRegExp> re);
-  static Handle<ByteArray> IrregexpCode(Handle<JSRegExp> re);
+  static int IrregexpNumberOfCaptures(Handle<FixedArray> re);
+  static int IrregexpNumberOfRegisters(Handle<FixedArray> re);
+  static Handle<ByteArray> IrregexpByteCode(Handle<FixedArray> re);
+  static Handle<Code> IrregexpNativeCode(Handle<FixedArray> re);

    // Call jsRegExpExecute once
    static Handle<Object> JscreExecOnce(Handle<JSRegExp> regexp,
@@ -153,7 +155,7 @@
                                        int* ovector,
                                        int ovector_length);

-  static Handle<Object> IrregexpExecOnce(Handle<JSRegExp> regexp,
+  static Handle<Object> IrregexpExecOnce(Handle<FixedArray> regexp,
                                           int num_captures,
                                           Handle<String> subject16,
                                           int previous_index,
@@ -1082,7 +1084,9 @@
                                      RegExpNode** node_return,
                                      bool ignore_case,
                                      bool multiline,
-                                    Handle<String> pattern);
+                                    Handle<String> pattern,
+                                    bool is_ascii);
+
    static void DotPrint(const char* label, RegExpNode* node, bool  
ignore_case);
  };


Modified: branches/bleeding_edge/src/objects.h
==============================================================================
--- branches/bleeding_edge/src/objects.h        (original)
+++ branches/bleeding_edge/src/objects.h        Mon Dec  8 04:43:01 2008
@@ -2924,7 +2924,7 @@
    // ATOM: A simple string to match against using an indexOf operation.
    // IRREGEXP: Compiled with Irregexp.
    // IRREGEXP_NATIVE: Compiled to native code with Irregexp.
-  enum Type { NOT_COMPILED, JSCRE, ATOM, IRREGEXP, IRREGEXP_NATIVE };
+  enum Type { NOT_COMPILED, JSCRE, ATOM, IRREGEXP };
    enum Flag { NONE = 0, GLOBAL = 1, IGNORE_CASE = 2, MULTILINE = 4 };

    class Flags {

Modified: branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc   (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc   Mon Dec  8  
04:43:01 2008
@@ -111,9 +111,10 @@


  void RegExpMacroAssemblerIA32::AdvanceCurrentPosition(int by) {
-  ASSERT(by > 0);
-  Label inside_string;
-  __ add(Operand(edi), Immediate(by * char_size()));
+  if (by != 0) {
+    Label inside_string;
+    __ add(Operand(edi), Immediate(by * char_size()));
+  }
  }


@@ -138,7 +139,7 @@
  void RegExpMacroAssemblerIA32::CheckBitmap(uc16 start,
                                             Label* bitmap,
                                             Label* on_zero) {
-  UNREACHABLE();
+  UNIMPLEMENTED();
    __ mov(eax, current_character());
    __ sub(Operand(eax), Immediate(start));
    __ cmp(eax, 64);  // FIXME: 64 = length_of_bitmap_in_bits.
@@ -683,6 +684,8 @@
                                                           int byte_offset1,
                                                           int byte_offset2,
                                                           size_t  
byte_length) {
+  // This function MUST NOT cause a garbage collection. A GC might move
+  // the calling generated code and invalidate the stacked return address.
    ASSERT(byte_length % 2 == 0);
    Address buffer_address = reinterpret_cast<Address>(*buffer);
    uc16* substring1 = reinterpret_cast<uc16*>(buffer_address +  
byte_offset1);

Modified: branches/bleeding_edge/test/cctest/test-regexp.cc
==============================================================================
--- branches/bleeding_edge/test/cctest/test-regexp.cc   (original)
+++ branches/bleeding_edge/test/cctest/test-regexp.cc   Mon Dec  8 04:43:01  
2008
@@ -355,7 +355,7 @@
  }


-static RegExpNode* Compile(const char* input, bool multiline) {
+static RegExpNode* Compile(const char* input, bool multiline, bool  
is_ascii) {
    V8::Initialize(NULL);
    FlatStringReader reader(CStrVector(input));
    RegExpParseResult result;
@@ -363,17 +363,18 @@
      return NULL;
    RegExpNode* node = NULL;
    Handle<String> pattern = Factory::NewStringFromUtf8(CStrVector(input));
-  RegExpEngine::Compile(&result, &node, false, multiline, pattern);
+  RegExpEngine::Compile(&result, &node, false, multiline, pattern,  
is_ascii);
    return node;
  }


  static void Execute(const char* input,
                      bool multiline,
+                    bool is_ascii,
                      bool dot_output = false) {
    v8::HandleScope scope;
    ZoneScope zone_scope(DELETE_ON_EXIT);
-  RegExpNode* node = Compile(input, multiline);
+  RegExpNode* node = Compile(input, multiline, is_ascii);
    USE(node);
  #ifdef DEBUG
    if (dot_output) {
@@ -1130,7 +1131,7 @@
  TEST(SimplePropagation) {
    v8::HandleScope scope;
    ZoneScope zone_scope(DELETE_ON_EXIT);
-  RegExpNode* node = Compile("(a|^b|c)", false);
+  RegExpNode* node = Compile("(a|^b|c)", false, true);
    CHECK(node->info()->follows_start_interest);
  }

@@ -1300,5 +1301,5 @@

  TEST(Graph) {
    V8::Initialize(NULL);
-  Execute("(?=[d#.])", false, true);
+  Execute("(?=[d#.])", false, true, true);
  }

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to