Author: [EMAIL PROTECTED]
Date: Wed Nov 19 01:37:50 2008
New Revision: 790

Modified:
    branches/experimental/regexp2000/src/assembler-re2k-inl.h
    branches/experimental/regexp2000/src/assembler-re2k.cc
    branches/experimental/regexp2000/src/assembler-re2k.h
    branches/experimental/regexp2000/src/bytecodes-re2k.h
    branches/experimental/regexp2000/src/interpreter-re2k.cc
    branches/experimental/regexp2000/src/interpreter-re2k.h
    branches/experimental/regexp2000/src/jsregexp-inl.h
    branches/experimental/regexp2000/src/jsregexp.cc
    branches/experimental/regexp2000/src/jsregexp.h
    branches/experimental/regexp2000/src/parser.cc
    branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.cc
    branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.h
    branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.cc
    branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.h
    branches/experimental/regexp2000/src/regexp-macro-assembler.h
    branches/experimental/regexp2000/test/cctest/test-regexp.cc
    branches/experimental/regexp2000/test/mjsunit/regexp.js
    branches/experimental/regexp2000/test/mjsunit/unicode-test.js

Log:
* No failures on our own tests.
* 26 failures on Mozilla tests.
* Remember to include linebreaks in \s
* Interpreter takes flat 16 bit strings as input.
* Remove dubious test from mjsunit/regexp.js
   (http://code.google.com/p/v8/issues/detail?id=152)
* Add debugging help (off by default) to unicode-test.js
* The regexp-macro-assembler interface now has the concept
   of a current_character register.
* Removed CheckCharacterClass from regexp-macro-assembler
   (too high level an operation for this level).
* Introduce CheckCharacterLT and CheckCharacterGT to the
   macro assembler interface.
* Make the re2k assembler use a growable instruction buffer
   to eliminate an arbitrary size limit.
* Add --trace-regexp-bytecodes option to debug build.
* Make RegExpNode::GoTo virtual so the backtrack node can
   just inline itself.
* Add protected RegExpNode::Bind() that subclasses use when
   emitting their code.
* Limit max recursion in Emit stage to avoid stack overflow.
* Remember to reserve at least 2 registers for 0th capture.
* Bail out to JSCRE when encountering \b, ^, $.
* Fix code emission and implement guards on ChoiceNode.
   (Still doesn't use dispatch table).
* Implement code emission for TextNode.
* Remember to set up backtrack when writing capture indeces
   to capture registers so they can be unwound if neccessary.
* DispatchTableConstructor::VisitBackreference isn't yet
   implemented, but we don't crash the VM.  (Later we
   discover the regexp has backreferences and defer to jscre).
* \b in a character class means backspace.

Review URL: http://codereview.chromium.org/11228

Modified: branches/experimental/regexp2000/src/assembler-re2k-inl.h
==============================================================================
--- branches/experimental/regexp2000/src/assembler-re2k-inl.h   (original)
+++ branches/experimental/regexp2000/src/assembler-re2k-inl.h   Wed Nov 19  
01:37:50 2008
@@ -38,17 +38,29 @@


  void Re2kAssembler::Emit(uint32_t byte) {
+  ASSERT(pc_ <= buffer_.length());
+  if (pc_ == buffer_.length()) {
+    Expand();
+  }
    buffer_[pc_++] = byte;
  }


  void Re2kAssembler::Emit16(uint32_t word) {
+  ASSERT(pc_ <= buffer_.length());
+  if (pc_ + 1 >= buffer_.length()) {
+    Expand();
+  }
    Store16(buffer_.start() + pc_, word);
    pc_ += 2;
  }


  void Re2kAssembler::Emit32(uint32_t word) {
+  ASSERT(pc_ <= buffer_.length());
+  if (pc_ + 3 >= buffer_.length()) {
+    Expand();
+  }
    Store32(buffer_.start() + pc_, word);
    pc_ += 4;
  }

Modified: branches/experimental/regexp2000/src/assembler-re2k.cc
==============================================================================
--- branches/experimental/regexp2000/src/assembler-re2k.cc      (original)
+++ branches/experimental/regexp2000/src/assembler-re2k.cc      Wed Nov 19  
01:37:50 2008
@@ -47,6 +47,9 @@


  Re2kAssembler::~Re2kAssembler() {
+  if (own_buffer_) {
+    buffer_.Dispose();
+  }
  }


@@ -173,22 +176,17 @@
  }


-void Re2kAssembler::CheckRange(uc16 start, uc16 end, Label* on_mismatch) {
-  if (start == end) {
-    CheckChar(start, on_mismatch);
-  }
-  Emit(BC_CHECK_RANGE);
-  Emit16(start);
-  Emit16(end);
-  EmitOrLink(on_mismatch);
+void Re2kAssembler::CheckCharacterLT(uc16 limit, Label* on_less) {
+  Emit(BC_CHECK_LT);
+  Emit16(limit);
+  EmitOrLink(on_less);
  }


-void Re2kAssembler::CheckNotRange(uc16 start, uc16 end, Label* on_match) {
-  Emit(BC_CHECK_NOT_RANGE);
-  Emit16(start);
-  Emit16(end);
-  EmitOrLink(on_match);
+void Re2kAssembler::CheckCharacterGT(uc16 limit, Label* on_greater) {
+  Emit(BC_CHECK_GT);
+  Emit16(limit);
+  EmitOrLink(on_greater);
  }


@@ -284,5 +282,18 @@
  void Re2kAssembler::Copy(Address a) {
    memcpy(a, buffer_.start(), length());
  }
+
+
+void Re2kAssembler::Expand() {
+  bool old_buffer_was_our_own = own_buffer_;
+  Vector<byte> old_buffer = buffer_;
+  buffer_ = Vector<byte>::New(old_buffer.length() * 2);
+  own_buffer_ = true;
+  memcpy(buffer_.start(), old_buffer.start(), old_buffer.length());
+  if (old_buffer_was_our_own) {
+    old_buffer.Dispose();
+  }
+}
+

  } }  // namespace v8::internal

Modified: branches/experimental/regexp2000/src/assembler-re2k.h
==============================================================================
--- branches/experimental/regexp2000/src/assembler-re2k.h       (original)
+++ branches/experimental/regexp2000/src/assembler-re2k.h       Wed Nov 19  
01:37:50 2008
@@ -60,11 +60,11 @@
    void CheckChar(uc16 c, Label* on_mismatch);
    void CheckNotChar(uc16 c, Label* on_match);

-  // Checks current char register against a range.
-  void CheckRange(uc16 start, uc16 end, Label* on_mismatch);
-  void CheckNotRange(uc16 start, uc16 end, Label* on_match);
+  // Used to check current char register against a range.
+  void CheckCharacterLT(uc16 limit, Label* on_less);
+  void CheckCharacterGT(uc16 limit, Label* on_greater);

-  // Checks current position (plus optional offset) for a match against a
+  // Checks current position for a match against a
    // previous capture.  Advances current position by the length of the  
capture
    // iff it matches.  The capture is stored in a given register and the
    // the register after.  If a register contains -1 then the other register
@@ -122,6 +122,8 @@

    // True if the assembler owns the buffer, false if buffer is external.
    bool own_buffer_;
+
+  void Expand();
  };



Modified: branches/experimental/regexp2000/src/bytecodes-re2k.h
==============================================================================
--- branches/experimental/regexp2000/src/bytecodes-re2k.h       (original)
+++ branches/experimental/regexp2000/src/bytecodes-re2k.h       Wed Nov 19  
01:37:50 2008
@@ -49,8 +49,8 @@
  V(LOAD_CURRENT_CHAR, 14, 9) /* load offset32  
addr32                         */ \
  V(CHECK_CHAR,        15, 7) /* check_char uc16  
addr32                       */ \
  V(CHECK_NOT_CHAR,    16, 7) /* check_not_char uc16  
addr32                   */ \
-V(CHECK_RANGE,       17, 9) /* check_range uc16 uc16  
addr32                 */ \
-V(CHECK_NOT_RANGE,   18, 9) /* check_not_range uc16 uc16  
addr32             */ \
+V(CHECK_LT,          17, 7) /* check_lt uc16  
addr32                         */ \
+V(CHECK_GT,          18, 7) /* check_gr uc16  
addr32                         */ \
  V(CHECK_BACKREF,     19, 9) /* check_backref offset32 capture_idx  
addr32    */ \
  V(CHECK_NOT_BACKREF, 20, 9) /* check_not_backref offset32 capture_idx  
addr32*/ \
  V(LOOKUP_MAP1,       21, 11) /* l_map1 start16 bit_map_addr32  
addr32        */ \
@@ -64,6 +64,11 @@
    static const int BC_##name = code;
  BYTECODE_ITERATOR(DECLARE_BYTECODES)
  #undef DECLARE_BYTECODES
+
+#define DECLARE_BYTECODE_LENGTH(name, code, length) \
+  static const int BC_##name##_LENGTH = length;
+BYTECODE_ITERATOR(DECLARE_BYTECODE_LENGTH)
+#undef DECLARE_BYTECODE_LENGTH
  } }

  #endif  // V8_BYTECODES_IA32_H_

Modified: branches/experimental/regexp2000/src/interpreter-re2k.cc
==============================================================================
--- branches/experimental/regexp2000/src/interpreter-re2k.cc    (original)
+++ branches/experimental/regexp2000/src/interpreter-re2k.cc    Wed Nov 19  
01:37:50 2008
@@ -39,15 +39,35 @@


  #ifdef DEBUG
-# define BYTECODE(name) break;                                            \
-                        case BC_##name:                                   \
-                          if (FLAG_trace_regexp_bytecodes) {              \
-                            PrintF("pc = %d, current = %d, bc = "         \
-                                    #name "\n", pc - code_base, current); \
-                          }
+static void TraceInterpreter(const byte* code_base,
+                             const byte* pc,
+                             int stack_depth,
+                             int current_position,
+                             int bytecode_length,
+                             const char* bytecode_name) {
+  if (FLAG_trace_regexp_bytecodes) {
+    PrintF("pc = %02x, sp = %d, current = %d, bc = %s",
+            pc - code_base,
+            stack_depth,
+            current_position,
+            bytecode_name);
+    for (int i = 1; i < bytecode_length; i++) {
+      printf(", %02x", pc[i]);
+    }
+    printf("\n");
+  }
+}
+
+
+# define BYTECODE(name) case  
BC_##name:                                       \
+                           
TraceInterpreter(code_base,                         \
+                                            
pc,                                \
+                                           backtrack_sp -  
backtrack_stack,    \
+                                            
current,                           \
+                                            
BC_##name##_LENGTH,                \
+                                           #name);
  #else
-# define BYTECODE(name) break;                                            \
-                        case BC_##name:
+# define BYTECODE(name) case BC_##name:
  #endif


@@ -57,8 +77,8 @@
                       int* registers,
                       int current) {
    const byte* pc = code_base;
-  int backtrack_stack[1000];
-  int backtrack_stack_space = 1000;
+  int backtrack_stack[10000];
+  int backtrack_stack_space = 10000;
    int* backtrack_sp = backtrack_stack;
    int current_char = -1;
  #ifdef DEBUG
@@ -76,106 +96,122 @@
            return false;  // No match on backtrack stack overflow.
          }
          *backtrack_sp++ = current + Load32(pc + 1);
-        pc += 5;
+        pc += BC_PUSH_CP_LENGTH;
+        break;
        BYTECODE(PUSH_BT)
          if (--backtrack_stack_space < 0) {
            return false;  // No match on backtrack stack overflow.
          }
          *backtrack_sp++ = Load32(pc + 1);
-        pc += 5;
+        pc += BC_PUSH_BT_LENGTH;
+        break;
        BYTECODE(PUSH_REGISTER)
          if (--backtrack_stack_space < 0) {
            return false;  // No match on backtrack stack overflow.
          }
          *backtrack_sp++ = registers[pc[1]];
-        pc += 2;
+        pc += BC_PUSH_REGISTER_LENGTH;
+        break;
        BYTECODE(SET_REGISTER)
          registers[pc[1]] = Load32(pc + 2);
-        pc += 6;
+        pc += BC_SET_REGISTER_LENGTH;
+        break;
        BYTECODE(ADVANCE_REGISTER)
          registers[pc[1]] += Load32(pc + 2);
-        pc += 6;
+        pc += BC_ADVANCE_REGISTER_LENGTH;
+        break;
        BYTECODE(SET_REGISTER_TO_CP)
          registers[pc[1]] = current + Load32(pc + 2);
-        pc += 6;
+        pc += BC_SET_REGISTER_TO_CP_LENGTH;
+        break;
        BYTECODE(POP_CP)
          backtrack_stack_space++;
          --backtrack_sp;
          current = *backtrack_sp;
-        pc += 1;
+        pc += BC_POP_CP_LENGTH;
+        break;
        BYTECODE(POP_BT)
          backtrack_stack_space++;
          --backtrack_sp;
          pc = code_base + *backtrack_sp;
+        break;
        BYTECODE(POP_REGISTER)
          backtrack_stack_space++;
          --backtrack_sp;
          registers[pc[1]] = *backtrack_sp;
-        pc += 2;
+        pc += BC_POP_REGISTER_LENGTH;
+        break;
        BYTECODE(FAIL)
          return false;
        BYTECODE(SUCCEED)
          return true;
        BYTECODE(ADVANCE_CP)
          current += Load32(pc + 1);
-        pc += 5;
+        pc += BC_ADVANCE_CP_LENGTH;
+        break;
        BYTECODE(GOTO)
          pc = code_base + Load32(pc + 1);
+        break;
        BYTECODE(LOAD_CURRENT_CHAR) {
          int pos = current + Load32(pc + 1);
          if (pos >= subject.length()) {
            pc = code_base + Load32(pc + 5);
          } else {
            current_char = subject[pos];
-          pc += 9;
+          pc += BC_LOAD_CURRENT_CHAR_LENGTH;
          }
+        break;
        }
        BYTECODE(CHECK_CHAR) {
          int c = Load16(pc + 1);
          if (c != current_char) {
            pc = code_base + Load32(pc + 3);
          } else {
-          pc += 7;
+          pc += BC_CHECK_CHAR_LENGTH;
          }
+        break;
        }
        BYTECODE(CHECK_NOT_CHAR) {
          int c = Load16(pc + 1);
          if (c == current_char) {
            pc = code_base + Load32(pc + 3);
          } else {
-          pc += 7;
+          pc += BC_CHECK_NOT_CHAR_LENGTH;
          }
+        break;
        }
-      BYTECODE(CHECK_RANGE) {
-        int start = Load16(pc + 1);
-        int end = Load16(pc + 3);
-        if (current_char < start || current_char > end) {
-          pc = code_base + Load32(pc + 5);
+      BYTECODE(CHECK_LT) {
+        int limit = Load16(pc + 1);
+        if (current_char < limit) {
+          pc = code_base + Load32(pc + 3);
          } else {
-          pc += 9;
+          pc += BC_CHECK_LT_LENGTH;
          }
+        break;
        }
-      BYTECODE(CHECK_NOT_RANGE) {
-        int start = Load16(pc + 1);
-        int end = Load16(pc + 3);
-        if (current_char >= start && current_char <= end) {
-          pc = code_base + Load32(pc + 5);
+      BYTECODE(CHECK_GT) {
+        int limit = Load16(pc + 1);
+        if (current_char > limit) {
+          pc = code_base + Load32(pc + 3);
          } else {
-          pc += 9;
+          pc += BC_CHECK_GT_LENGTH;
          }
+        break;
        }
        BYTECODE(CHECK_REGISTER_LT)
          if (registers[pc[1]] < Load16(pc + 2)) {
            pc = code_base + Load32(pc + 4);
          } else {
-          pc += 8;
+          pc += BC_CHECK_REGISTER_LT_LENGTH;
          }
+        break;
        BYTECODE(CHECK_REGISTER_GE)
          if (registers[pc[1]] >= Load16(pc + 2)) {
            pc = code_base + Load32(pc + 4);
          } else {
-          pc += 8;
+          pc += BC_CHECK_REGISTER_GE_LENGTH;
          }
+        break;
        BYTECODE(LOOKUP_MAP1) {
          // Look up character in a bitmap.  If we find a 0, then jump to the
          // location at pc + 7.  Otherwise fall through!
@@ -185,8 +221,9 @@
          if (map == 0) {
            pc = code_base + Load32(pc + 7);
          } else {
-          pc += 11;
+          pc += BC_LOOKUP_MAP1_LENGTH;
          }
+        break;
        }
        BYTECODE(LOOKUP_MAP2) {
          // Look up character in a half-nibble map.  If we find 00, then  
jump to
@@ -208,6 +245,7 @@
              pc = code_base + Load32(pc + 19);
            }
          }
+        break;
        }
        BYTECODE(LOOKUP_MAP8) {
          // Look up character in a byte map.  Use the byte as an index into  
a
@@ -216,6 +254,7 @@
          byte map = code_base[Load32(pc + 3) + index];
          const byte* new_pc = code_base + Load32(pc + 7) + (map << 2);
          pc = code_base + Load32(new_pc);
+        break;
        }
        BYTECODE(LOOKUP_HI_MAP8) {
          // Look up high byte of this character in a byte map.  Use the  
byte as
@@ -224,12 +263,14 @@
          byte map = code_base[Load32(pc + 2) + index];
          const byte* new_pc = code_base + Load32(pc + 6) + (map << 2);
          pc = code_base + Load32(new_pc);
+        break;
        }
        BYTECODE(CHECK_BACKREF)
          UNREACHABLE();
+        break;
        BYTECODE(CHECK_NOT_BACKREF)
          UNREACHABLE();
-        break;  // Last one doesn't have break in macro.
+        break;
        default:
          UNREACHABLE();
          break;
@@ -239,15 +280,18 @@


  bool Re2kInterpreter::Match(Handle<ByteArray> code_array,
-                            Handle<String> subject,
+                            Handle<String> subject16,
                              int* registers,
                              int start_position) {
+  ASSERT(StringShape(*subject16).IsTwoByteRepresentation());
+  ASSERT(subject16->IsFlat(StringShape(*subject16)));
+
+
+  AssertNoAllocation a;
    const byte* code_base = code_array->GetDataStartAddress();
-  ASSERT(subject->IsFlat(StringShape(*subject)));
-  Handle<String> flat_two_byte =  
RegExpImpl::CachedStringToTwoByte(subject);
-  ASSERT(StringShape(*flat_two_byte).IsTwoByteRepresentation());
    return RawMatch(code_base,
-                  flat_two_byte->ToUC16Vector(),
+                  Vector<const uc16>(subject16->GetTwoByteData(),
+                                     subject16->length()),
                    registers,
                    start_position);
  }

Modified: branches/experimental/regexp2000/src/interpreter-re2k.h
==============================================================================
--- branches/experimental/regexp2000/src/interpreter-re2k.h     (original)
+++ branches/experimental/regexp2000/src/interpreter-re2k.h     Wed Nov 19  
01:37:50 2008
@@ -36,7 +36,7 @@
  class Re2kInterpreter {
   public:
    static bool Match(Handle<ByteArray> code,
-                    Handle<String> subject,
+                    Handle<String> subject16,
                      int* captures,
                      int start_position);
  };

Modified: branches/experimental/regexp2000/src/jsregexp-inl.h
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp-inl.h (original)
+++ branches/experimental/regexp2000/src/jsregexp-inl.h Wed Nov 19 01:37:50  
2008
@@ -30,6 +30,7 @@


  #include "jsregexp.h"
+#include "regexp-macro-assembler.h"


  namespace v8 {
@@ -250,6 +251,11 @@
    DoForEach<Node, Callback>(node->left(), callback);
    callback->Call(node->key(), node->value());
    DoForEach<Node, Callback>(node->right(), callback);
+}
+
+
+void RegExpNode::Bind(RegExpMacroAssembler* macro) {
+  macro->Bind(&label_);
  }



Modified: branches/experimental/regexp2000/src/jsregexp.cc
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.cc    (original)
+++ branches/experimental/regexp2000/src/jsregexp.cc    Wed Nov 19 01:37:50  
2008
@@ -460,25 +460,31 @@

  Handle<Object> RegExpImpl::Re2kExecOnce(Handle<JSRegExp> regexp,
                                          int num_captures,
-                                        Handle<String> subject,
+                                        Handle<String> two_byte_subject,
                                          int previous_index,
-                                        const uc16* two_byte_subject,
                                          int* offsets_vector,
                                          int offsets_vector_length) {
+#ifdef DEBUG
+  if (FLAG_trace_regexp_bytecodes) {
+    String* pattern = regexp->Pattern();
+    PrintF("\n\nRegexp match:   /%s/\n\n", *(pattern->ToCString()));
+    PrintF("\n\nSubject string: '%s'\n\n",  
*(two_byte_subject->ToCString()));
+  }
+#endif
+  ASSERT(StringShape(*two_byte_subject).IsTwoByteRepresentation());
+  ASSERT(two_byte_subject->IsFlat(StringShape(*two_byte_subject)));
    bool rc;
    {
      for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
        offsets_vector[i] = -1;
      }

-    AssertNoAllocation a;
-
-    LOG(RegExpExecEvent(regexp, previous_index, subject));
+    LOG(RegExpExecEvent(regexp, previous_index, two_byte_subject));

      Handle<ByteArray> byte_codes = Re2kCode(regexp);

      rc = Re2kInterpreter::Match(byte_codes,
-                                subject,
+                                two_byte_subject,
                                  offsets_vector,
                                  previous_index);
    }
@@ -605,13 +611,13 @@

    Handle<String> subject16 = CachedStringToTwoByte(subject);

-  Handle<Object> result(Re2kExecOnce(regexp,
-                                     num_captures,
-                                     subject,
-                                     previous_index,
-                                     subject16->GetTwoByteData(),
-                                     offsets.vector(),
-                                     offsets.length()));
+  Handle<Object> result(
+      Re2kExecOnce(regexp,
+                   num_captures,
+                   subject16,
+                   previous_index,
+                   offsets.vector(),
+                   offsets.length()));
    return result;
  }

@@ -671,9 +677,8 @@
      } else {
        matches = Re2kExecOnce(regexp,
                               Re2kNumberOfCaptures(regexp),
-                             subject,
+                             subject16,
                               previous_index,
-                             subject16->GetTwoByteData(),
                               offsets.vector(),
                               offsets.length());

@@ -845,11 +850,17 @@
    EndNode* accept() { return accept_; }
    EndNode* backtrack() { return backtrack_; }

+  static const int kMaxRecursion = 100;
+  inline int recursion_depth() { return recursion_depth_; }
+  inline void IncrementRecursionDepth() { recursion_depth_++; }
+  inline void DecrementRecursionDepth() { recursion_depth_--; }
+
   private:
    EndNode* accept_;
    EndNode* backtrack_;
    int next_register_;
    List<RegExpNode*>* work_list_;
+  int recursion_depth_;
    RegExpMacroAssembler* macro_assembler_;
  };

@@ -857,8 +868,9 @@
  // Attempts to compile the regexp using a Regexp2000 code generator.   
Returns
  // a fixed array or a null handle depending on whether it succeeded.
  RegExpCompiler::RegExpCompiler(int capture_count)
-  : next_register_(2 * capture_count),
-    work_list_(NULL) {
+  : next_register_(2 * (capture_count + 1)),
+    work_list_(NULL),
+    recursion_depth_(0) {
    accept_ = new EndNode(EndNode::ACCEPT);
    backtrack_ = new EndNode(EndNode::BACKTRACK);
  }
@@ -880,7 +892,7 @@
      return Handle<FixedArray>::null();
    }
    while (!work_list.is_empty()) {
-    if (!work_list.RemoveLast()->Emit(this)) {
+    if (!work_list.RemoveLast()->GoTo(this)) {
        fail.Unuse();
        return Handle<FixedArray>::null();
      }
@@ -903,27 +915,66 @@


  bool RegExpNode::GoTo(RegExpCompiler* compiler) {
+  // TODO(erikcorry): Implement support.
+  if (info_.follows_word_interest ||
+      info_.follows_newline_interest ||
+      info_.follows_start_interest) {
+    return false;
+  }
    if (label_.is_bound()) {
      compiler->macro_assembler()->GoTo(&label_);
      return true;
    } else {
-    return Emit(compiler);
+    if (compiler->recursion_depth() > RegExpCompiler::kMaxRecursion) {
+      compiler->macro_assembler()->GoTo(&label_);
+      compiler->AddWork(this);
+      return true;
+    } else {
+      compiler->IncrementRecursionDepth();
+      bool how_it_went = Emit(compiler);
+      compiler->DecrementRecursionDepth();
+      return how_it_went;
+    }
    }
  }


+bool EndNode::GoTo(RegExpCompiler* compiler) {
+  if (info()->follows_word_interest ||
+      info()->follows_newline_interest ||
+      info()->follows_start_interest) {
+    return false;
+  }
+  if (!label()->is_bound()) {
+    Bind(compiler->macro_assembler());
+  }
+  switch (action_) {
+    case ACCEPT:
+      compiler->macro_assembler()->Succeed();
+    break;
+    case BACKTRACK:
+      compiler->macro_assembler()->Backtrack();
+    break;
+  }
+  return true;
+}
+
+
  Label* RegExpNode::label() {
    return &label_;
  }


  bool EndNode::Emit(RegExpCompiler* compiler) {
+  RegExpMacroAssembler* macro = compiler->macro_assembler();
    switch (action_) {
      case ACCEPT:
-      compiler->macro_assembler()->Succeed();
+      Bind(macro);
+      macro->Succeed();
        return true;
      case BACKTRACK:
-      compiler->macro_assembler()->Backtrack();
+      Bind(macro);
+      macro->Backtrack();
        return true;
    }
    return false;
@@ -995,47 +1046,136 @@
  // Emit code.


-void ChoiceNode::GenerateGuard(RegExpCompiler* compiler,
-                               Guard *guard,
+void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
+                               Guard* guard,
                                 Label* on_failure) {
+  switch (guard->op()) {
+    case Guard::LT:
+      macro_assembler->IfRegisterGE(guard->reg(), guard->value(),  
on_failure);
+      break;
+    case Guard::GEQ:
+      macro_assembler->IfRegisterLT(guard->reg(), guard->value(),  
on_failure);
+      break;
+  }
+}
+
+
+bool TextNode::Emit(RegExpCompiler* compiler) {
+  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
+  Bind(macro_assembler);
+  int element_count = elms_->length();
+  int cp_offset = 0;
+  for (int i = 0; i < element_count; i++) {
+    TextElement elm = (*elms_)[i];
+    switch (elm.type) {
+      case TextElement::ATOM: {
+        Vector<const uc16> quarks = elm.data.u_atom->data();
+        macro_assembler->CheckCharacters(quarks,
+                                         cp_offset,
+                                         on_failure_->label());
+        cp_offset += quarks.length();
+        break;
+      }
+      case TextElement::CHAR_CLASS: {
+        RegExpCharacterClass* cc = elm.data.u_char_class;
+        if (cc->is_negated()) return false;
+        macro_assembler->LoadCurrentCharacter(cp_offset,  
on_failure_->label());
+        cp_offset++;
+
+        ZoneList<CharacterRange>* ranges = cc->ranges();
+
+        Label found;
+
+        int range_count = ranges->length();
+
+        if (range_count == 0) {
+          on_failure()->GoTo(compiler);
+          break;
+        }
+
+        for (int i = 0; i < range_count - 1; i++) {
+          CharacterRange& range = (*ranges)[i];
+          Label next_range;
+          uc16 from = range.from();
+          uc16 to = range.to();
+          if (from != 0) {
+            macro_assembler->CheckCharacterLT(from, &next_range);
+          }
+          if (to != 0xffff) {
+            macro_assembler->CheckCharacterLT(to + 1, &found);
+          } else {
+            macro_assembler->AdvanceCurrentPosition(1);
+            on_success()->GoTo(compiler);
+          }
+          macro_assembler->Bind(&next_range);
+        }
+
+        CharacterRange& range = (*ranges)[range_count - 1];
+        uc16 from = range.from();
+        uc16 to = range.to();
+        if (from != 0) {
+          macro_assembler->CheckCharacterLT(from, on_failure_->label());
+        }
+        if (to != 0xffff) {
+          macro_assembler->CheckCharacterGT(to, on_failure_->label());
+        }
+        compiler->AddWork(on_failure_);
+        macro_assembler->Bind(&found);
+        break;
+      }
+      default:
+        UNREACHABLE();
+        return false;
+    }
+  }
+  macro_assembler->AdvanceCurrentPosition(cp_offset);
+  return on_success()->GoTo(compiler);
  }


  bool ChoiceNode::Emit(RegExpCompiler* compiler) {
    int choice_count = alternatives_->length();
    RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
+  Bind(macro_assembler);
    // For now we just call all choices one after the other.  The idea  
ultimately
    // is to use the Dispatch table to try only the relevant ones.
-  for (int i = 0; i < choice_count; i++) {
+  int i;
+  for (i = 0; i < choice_count - 1; i++) {
      GuardedAlternative alternative = (*alternatives_)[i];
      Label after;
-    Label* next_alternative;
-    if (i < choice_count - 1) {
-      next_alternative = &after;
-    } else {
-      next_alternative = on_failure_->label();
-    }
+    Label after_no_pop_cp;
      ZoneList<Guard*>* guards = alternative.guards();
      if (guards != NULL) {
        int guard_count = guards->length();
        for (int j = 0; j < guard_count; j++) {
-        GenerateGuard(compiler, (*guards)[i], next_alternative);
+        GenerateGuard(macro_assembler, (*guards)[j], &after_no_pop_cp);
        }
      }
-    macro_assembler->PushBacktrack(next_alternative);
-    if (!alternative.node()->Emit(compiler)) {
+    macro_assembler->PushCurrentPosition();
+    macro_assembler->PushBacktrack(&after);
+    if (!alternative.node()->GoTo(compiler)) {
        after.Unuse();
-      if (next_alternative != &after) {
-        next_alternative->Unuse();
-      }
+      after_no_pop_cp.Unuse();
        return false;
      }
-    if (i < choice_count - 1) {
-      macro_assembler->Bind(&after);
-    } else {
-      after.Unuse();
+    macro_assembler->Bind(&after);
+    macro_assembler->PopCurrentPosition();
+    macro_assembler->Bind(&after_no_pop_cp);
+  }
+  GuardedAlternative alternative = (*alternatives_)[i];
+  ZoneList<Guard*>* guards = alternative.guards();
+  if (guards != NULL) {
+    int guard_count = guards->length();
+    for (int j = 0; j < guard_count; j++) {
+      GenerateGuard(macro_assembler, (*guards)[j], on_failure_->label());
      }
    }
+  if (!on_failure_->IsBacktrack()) {
+    macro_assembler->PushBacktrack(on_failure_->label());
+  }
+  if (!alternative.node()->GoTo(compiler)) {
+    return false;
+  }
    compiler->AddWork(on_failure_);
    return true;
  }
@@ -1043,20 +1183,44 @@

  bool ActionNode::Emit(RegExpCompiler* compiler) {
    RegExpMacroAssembler* macro = compiler->macro_assembler();
+  Bind(macro);
    switch (type_) {
      case STORE_REGISTER:
        macro->SetRegister(data_.u_store_register.reg,
                           data_.u_store_register.value);
        break;
-    case INCREMENT_REGISTER:
+    case INCREMENT_REGISTER: {
+      Label undo;
+      macro->PushBacktrack(&undo);
        macro->AdvanceRegister(data_.u_increment_register.reg, 1);
+      bool ok = on_success()->GoTo(compiler);
+      if (!ok) {
+        undo.Unuse();
+        return false;
+      }
+      macro->Bind(&undo);
+      macro->AdvanceRegister(data_.u_increment_register.reg, -1);
+      macro->Backtrack();
        break;
-    case STORE_POSITION:
-      macro->PushCurrentPosition();
+    }
+    case STORE_POSITION: {
+      Label undo;
+      macro->PushRegister(data_.u_position_register.reg);
+      macro->PushBacktrack(&undo);
+      macro->WriteCurrentPositionToRegister(data_.u_position_register.reg);
+      bool ok = on_success()->GoTo(compiler);
+      if (!ok) {
+        undo.Unuse();
+        return false;
+      }
+      macro->Bind(&undo);
+      macro->PopRegister(data_.u_position_register.reg);
+      macro->Backtrack();
        break;
+    }
      case RESTORE_POSITION:
-      macro->PopCurrentPosition();
-      break;
+      // TODO(erikcorry): Implement this.
+      return false;
      case BEGIN_SUBMATCH:
        // TODO(erikcorry): Implement this.
        return false;
@@ -1070,8 +1234,7 @@
        UNREACHABLE();
        return false;
    }
-  compiler->AddWork(on_success());
-  return true;
+  return on_success()->GoTo(compiler);
  }


@@ -1565,9 +1728,9 @@

  static const int kSpaceRangeCount = 20;
  static const uc16 kSpaceRanges[kSpaceRangeCount] = {
-  0x0009, 0x0009, 0x000B, 0x000C, 0x0020, 0x0020, 0x00A0, 0x00A0,
-  0x1680, 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x202F, 0x202F,
-  0x205F, 0x205F, 0x3000, 0x3000
+  0x0009, 0x000D, 0x0020, 0x0020, 0x00A0, 0x00A0, 0x1680,
+  0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029,
+  0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000
  };


@@ -1969,7 +2132,7 @@


  void DispatchTableConstructor::VisitBackreference(BackreferenceNode* that)  
{
-  UNIMPLEMENTED();
+  // TODO(plesner): What should this do?
  }


@@ -2055,7 +2218,7 @@
    if (node_return != NULL) *node_return = node;
    Analysis analysis;
    analysis.EnsureAnalyzed(node);
-  byte codes[10240];
+  byte codes[1024];
    Re2kAssembler assembler(Vector<byte>(codes, 1024));
    RegExpMacroAssemblerRe2k macro_assembler(&assembler);
    return compiler.Assemble(&macro_assembler,

Modified: branches/experimental/regexp2000/src/jsregexp.h
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.h     (original)
+++ branches/experimental/regexp2000/src/jsregexp.h     Wed Nov 19 01:37:50 2008
@@ -30,6 +30,10 @@

  namespace v8 { namespace internal {

+
+class RegExpMacroAssembler;
+
+
  class RegExpImpl {
   public:
    // Creates a regular expression literal in the old space.
@@ -151,9 +155,8 @@

    static Handle<Object> Re2kExecOnce(Handle<JSRegExp> regexp,
                                       int num_captures,
-                                     Handle<String> subject,
+                                     Handle<String> subject16,
                                       int previous_index,
-                                     const uc16* utf8_subject,
                                       int* ovector,
                                       int ovector_length);

@@ -476,7 +479,7 @@


  class SiblingList {
-public:
+ public:
    SiblingList() : list_(NULL) { }
    int length() {
      return list_ == NULL ? 0 : list_->length();
@@ -489,7 +492,7 @@
    }
    void Add(RegExpNode* node) { list_->Add(node); }
    RegExpNode* Get(int index) { return list_->at(index); }
-private:
+ private:
    ZoneList<RegExpNode*>* list_;
  };

@@ -501,7 +504,7 @@
    // Generates a goto to this node or actually generates the code at this  
point.
    // Until the implementation is complete we will return true for success  
and
    // false for failure.
-  bool GoTo(RegExpCompiler* compiler);
+  virtual bool GoTo(RegExpCompiler* compiler);
    Label* label();

    // Until the implementation is complete we will return true for success  
and
@@ -513,6 +516,8 @@
    RegExpNode* GetSibling(NodeInfo* info);
    void EnsureSiblings() { siblings_.Ensure(this); }
    void AddSibling(RegExpNode* node) { siblings_.Add(node); }
+ protected:
+  inline void Bind(RegExpMacroAssembler* macro);
   private:
    Label label_;
    NodeInfo info_;
@@ -583,9 +588,9 @@
        on_failure_(on_failure),
        elms_(elms) { }
    virtual void Accept(NodeVisitor* visitor);
-  virtual bool Emit(RegExpCompiler* compiler) { return false; }
    virtual RegExpNode* PropagateInterest(NodeInfo* info);
    RegExpNode* on_failure() { return on_failure_; }
+  virtual bool Emit(RegExpCompiler* compiler);
    ZoneList<TextElement>* elements() { return elms_; }
   private:
    RegExpNode* on_failure_;
@@ -624,6 +629,7 @@
    virtual bool Emit(RegExpCompiler* compiler);
    virtual RegExpNode* PropagateInterest(NodeInfo* info);
    virtual bool IsBacktrack() { return action_ == BACKTRACK; }
+  virtual bool GoTo(RegExpCompiler* compiler);
   private:
    Action action_;
  };
@@ -678,7 +684,7 @@
    bool being_calculated() { return being_calculated_; }
    void set_being_calculated(bool b) { being_calculated_ = b; }
   private:
-  void GenerateGuard(RegExpCompiler* compiler,
+  void GenerateGuard(RegExpMacroAssembler* macro_assembler,
                       Guard *guard,
                       Label* on_failure);
    RegExpNode* on_failure_;

Modified: branches/experimental/regexp2000/src/parser.cc
==============================================================================
--- branches/experimental/regexp2000/src/parser.cc      (original)
+++ branches/experimental/regexp2000/src/parser.cc      Wed Nov 19 01:37:50 2008
@@ -4025,6 +4025,9 @@
    ASSERT(has_next() && !IsSpecialClassEscape(next()));
    Advance();
    switch (current()) {
+    case 'b':
+      Advance();
+      return '\b';
      // ControlEscape :: one of
      //   f n r t v
      case 'f':

Modified:  
branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.cc
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.cc  
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.cc Wed  
Nov 19 01:37:50 2008
@@ -121,8 +121,18 @@
  }


-void RegExpMacroAssemblerIA32::CheckCharacterClass(RegExpCharacterClass  
*cclass,
-                                                   Label* on_failure) {
+void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset,
+                                                    Label*  
on_end_of_input) {
+  UNREACHABLE();  // Not implemented.
+}
+
+
+void RegExpMacroAssemblerIA32::CheckCharacterLT(uc16 limit, Label*  
on_less) {
+  UNREACHABLE();  // Not implemented.
+}
+
+
+void RegExpMacroAssemblerIA32::CheckCharacterGT(uc16 limit, Label*  
on_greater) {
    UNREACHABLE();  // Not implemented.
  }


Modified: branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.h
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.h   
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.h  Wed  
Nov 19 01:37:50 2008
@@ -43,6 +43,9 @@
    virtual void AdvanceRegister(int reg, int by);  // r[reg] += by.
    virtual void Backtrack();
    virtual void Bind(Label* label);
+  virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
+
+

    // Check the current character against a bitmap.  The range of the  
current
    // character must be from start to start + length_of_bitmap_in_bits.

Modified:  
branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.cc
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.cc  
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.cc Wed  
Nov 19 01:37:50 2008
@@ -123,46 +123,6 @@
  }


-static void TwoWayCharacterClass(
-    Re2kAssembler* assembler,
-    RegExpCharacterClass* char_class,
-    Label* on_match,
-    Label* on_mismatch) {
-  ZoneList<CharacterRange>* ranges = char_class->ranges();
-  int range_count = ranges->length();
-  if (!char_class->is_negated()) {
-    for (int i = 0; i < range_count; i++) {
-      CharacterRange& range = ranges->at(i);
-      assembler->CheckRange(range.from(), range.to(), on_match);
-    }
-    if (on_mismatch == NULL) {
-      assembler->PopBacktrack();
-    } else {
-      assembler->GoTo(on_mismatch);
-    }
-  } else {  // range is negated.
-    if (range_count == 0) {
-      assembler->GoTo(on_match);
-    } else {
-      CharacterRange& previous = ranges->at(0);
-      if (previous.from() > 0) {
-        assembler->CheckRange(0, previous.from() - 1, on_match);
-      }
-      for (int i = 1; i < range_count; i++) {
-        CharacterRange& range = ranges->at(i);
-        if (previous.to() < range.from() - 1) {
-          assembler->CheckRange(previous.to() + 1, range.from() - 1,  
on_match);
-        }
-        previous = range;
-      }
-      if (previous.to() < 65535) {
-        assembler->CheckRange(previous.to() + 1, 65535, on_match);
-      }
-    }
-  }
-}
-
-
  void RegExpMacroAssemblerRe2k::CheckCurrentPosition(
    int register_index,
    Label* on_equal) {
@@ -171,23 +131,21 @@
  }


-void RegExpMacroAssemblerRe2k::CheckCharacterClass(
-    RegExpCharacterClass* char_class,
-    int cp_offset,
-    Label* on_failure) {
+void RegExpMacroAssemblerRe2k::LoadCurrentCharacter(int cp_offset,
+                                                    Label* on_failure) {
    assembler_->LoadCurrentChar(cp_offset, on_failure);
-  if (!char_class->is_negated() &&
-      char_class->ranges()->length() == 1 &&
-      on_failure != NULL) {
-    // This is the simple case where the char class has one range and we  
want to
-    // fall through if it matches.
-    CharacterRange& range = char_class->ranges()->at(0);
-    assembler_->CheckNotRange(range.from(), range.to(), on_failure);
-  } else {
-    Label on_success;
-    TwoWayCharacterClass(assembler_, char_class, &on_success, on_failure);
-    assembler_->Bind(&on_success);
-  }
+}
+
+
+void RegExpMacroAssemblerRe2k::CheckCharacterLT(uc16 limit,
+                                                Label* on_less) {
+  assembler_->CheckCharacterLT(limit, on_less);
+}
+
+
+void RegExpMacroAssemblerRe2k::CheckCharacterGT(uc16 limit,
+                                                Label* on_greater) {
+  assembler_->CheckCharacterGT(limit, on_greater);
  }



Modified: branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.h
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.h   
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.h  Wed  
Nov 19 01:37:50 2008
@@ -52,17 +52,13 @@
    virtual void AdvanceRegister(int reg, int by);  // r[reg] += by.
    virtual void SetRegister(int register_index, int to);
    virtual void WriteCurrentPositionToRegister(int reg);
-  virtual void CheckCharacterClass(
-      RegExpCharacterClass* cclass,
-      int cp_offset,
-      Label* on_failure);
-  virtual void CheckCharacters(
-      Vector<const uc16> str,
-      int cp_offset,
-      Label* on_failure);
-  virtual void CheckCurrentPosition(
-      int register_index,
-      Label* on_equal);
+  virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
+  virtual void CheckCharacterLT(uc16 limit, Label* on_less);
+  virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
+  virtual void CheckCharacters(Vector<const uc16> str,
+                               int cp_offset,
+                               Label* on_failure);
+  virtual void CheckCurrentPosition(int register_index, Label* on_equal);
    virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
    virtual void DispatchHalfNibbleMap(uc16 start,
                                       Label* half_nibble_map,

Modified: branches/experimental/regexp2000/src/regexp-macro-assembler.h
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler.h       
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler.h       Wed Nov 
 
19 01:37:50 2008
@@ -54,16 +54,11 @@
    virtual void PopRegister(int register_index) = 0;
    virtual void PushRegister(int register_index) = 0;
    virtual void AdvanceRegister(int reg, int by) = 0;  // r[reg] += by.
-  virtual void WriteCurrentPositionToRegister(int reg) = 0;
    virtual void SetRegister(int register_index, int to) = 0;
-  // Looks at the next character from the subject and if it doesn't match
-  // then goto the on_failure label.  End of input never matches.  If the
-  // label is NULL then we should pop a backtrack address off the stack and
-  // go to that.
-  virtual void CheckCharacterClass(
-      RegExpCharacterClass* cclass,
-      int cp_offset,
-      Label* on_failure) = 0;
+  virtual void WriteCurrentPositionToRegister(int reg) = 0;
+  virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input)  
= 0;
+  virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
+  virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
    // Check the current character for a match with a literal string.  If we
    // fail to match then goto the on_failure label.  End of input always
    // matches.  If the label is NULL then we should pop a backtrack address  
off

Modified: branches/experimental/regexp2000/test/cctest/test-regexp.cc
==============================================================================
--- branches/experimental/regexp2000/test/cctest/test-regexp.cc (original)
+++ branches/experimental/regexp2000/test/cctest/test-regexp.cc Wed Nov 19  
01:37:50 2008
@@ -277,7 +277,15 @@

  static bool IsWhiteSpace(uc16 c) {
    switch (c) {
-    case 0x09: case 0x0B: case 0x0C: case 0x20: case 0xA0:
+    case 0x09:
+    case 0x0A:
+    case 0x0B:
+    case 0x0C:
+    case 0x0d:
+    case 0x20:
+    case 0xA0:
+    case 0x2028:
+    case 0x2029:
        return true;
      default:
        return unibrow::Space::Is(c);
@@ -519,15 +527,18 @@

    Handle<String> f1 =
        Factory::NewStringFromAscii(CStrVector("Now is the time"));
-  CHECK(!Re2kInterpreter::Match(array, f1, captures, 0));
+  Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1);
+  CHECK(!Re2kInterpreter::Match(array, f1_16, captures, 0));

    Handle<String> f2 = Factory::NewStringFromAscii(CStrVector("foo bar  
baz"));
-  CHECK(Re2kInterpreter::Match(array, f2, captures, 0));
+  Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2);
+  CHECK(Re2kInterpreter::Match(array, f2_16, captures, 0));
    CHECK_EQ(0, captures[0]);
    CHECK_EQ(2, captures[1]);

    Handle<String> f3 =  
Factory::NewStringFromAscii(CStrVector("tomfoolery"));
-  CHECK(Re2kInterpreter::Match(array, f3, captures, 0));
+  Handle<String> f3_16 = RegExpImpl::StringToTwoByte(f3);
+  CHECK(Re2kInterpreter::Match(array, f3_16, captures, 0));
    CHECK_EQ(3, captures[0]);
    CHECK_EQ(5, captures[1]);
  }
@@ -591,27 +602,32 @@

    Handle<String> f1 =
        Factory::NewStringFromAscii(CStrVector("Now is the time"));
-  CHECK(!Re2kInterpreter::Match(array, f1, captures, 0));
+  Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1);
+  CHECK(!Re2kInterpreter::Match(array, f1_16, captures, 0));

    Handle<String> f2 = Factory::NewStringFromAscii(CStrVector("foo bar  
baz"));
-  CHECK(Re2kInterpreter::Match(array, f2, captures, 0));
+  Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2);
+  CHECK(Re2kInterpreter::Match(array, f2_16, captures, 0));
    CHECK_EQ(0, captures[0]);
    CHECK_EQ(2, captures[1]);

    Handle<String> f3 =  
Factory::NewStringFromAscii(CStrVector("tomfoolery"));
-  CHECK(Re2kInterpreter::Match(array, f3, captures, 0));
+  Handle<String> f3_16 = RegExpImpl::StringToTwoByte(f3);
+  CHECK(Re2kInterpreter::Match(array, f3_16, captures, 0));
    CHECK_EQ(0, captures[0]);
    CHECK_EQ(5, captures[1]);

    Handle<String> f4 =
        Factory::NewStringFromAscii(CStrVector("football buffoonery"));
-  CHECK(Re2kInterpreter::Match(array, f4, captures, 0));
+  Handle<String> f4_16 = RegExpImpl::StringToTwoByte(f4);
+  CHECK(Re2kInterpreter::Match(array, f4_16, captures, 0));
    CHECK_EQ(0, captures[0]);
    CHECK_EQ(14, captures[1]);

    Handle<String> f5 =
        Factory::NewStringFromAscii(CStrVector("walking\nbarefoot"));
-  CHECK(!Re2kInterpreter::Match(array, f5, captures, 0));
+  Handle<String> f5_16 = RegExpImpl::StringToTwoByte(f5);
+  CHECK(!Re2kInterpreter::Match(array, f5_16, captures, 0));
  }


@@ -662,7 +678,8 @@

    Handle<String> f1 =
        Factory::NewStringFromAscii(CStrVector("foobar"));
-  CHECK(Re2kInterpreter::Match(array, f1, captures, 0));
+  Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1);
+  CHECK(Re2kInterpreter::Match(array, f1_16, captures, 0));
    CHECK_EQ(0, captures[0]);
    CHECK_EQ(3, captures[1]);
    CHECK_EQ(1, captures[2]);
@@ -671,7 +688,8 @@

    Handle<String> f2 =
        Factory::NewStringFromAscii(CStrVector("barfoo"));
-  CHECK(!Re2kInterpreter::Match(array, f2, captures, 0));
+  Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2);
+  CHECK(!Re2kInterpreter::Match(array, f2_16, captures, 0));
    CHECK_EQ(42, captures[0]);
  }

@@ -770,5 +788,6 @@


  TEST(Graph) {
+  V8::Initialize(NULL);
    Execute("(a|^b|c)", "", true);
  }

Modified: branches/experimental/regexp2000/test/mjsunit/regexp.js
==============================================================================
--- branches/experimental/regexp2000/test/mjsunit/regexp.js     (original)
+++ branches/experimental/regexp2000/test/mjsunit/regexp.js     Wed Nov 19  
01:37:50 2008
@@ -89,7 +89,10 @@
  // From ecma_3/RegExp/regress-334158.js
  assertTrue(/\ca/.test( "\x01" ));
  assertFalse(/\ca/.test( "\\ca" ));
-assertTrue(/\c[a/]/.test( "\x1ba/]" ));
+// Passes in KJS, fails in IrregularExpressions.
+// See http://code.google.com/p/v8/issues/detail?id=152
+//assertTrue(/\c[a/]/.test( "\x1ba/]" ));
+

  // Test that we handle \s and \S correctly inside some bizarre
  // character classes.

Modified: branches/experimental/regexp2000/test/mjsunit/unicode-test.js
==============================================================================
--- branches/experimental/regexp2000/test/mjsunit/unicode-test.js       
(original)
+++ branches/experimental/regexp2000/test/mjsunit/unicode-test.js       Wed Nov 
 
19 01:37:50 2008
@@ -9134,6 +9134,32 @@
    assertEquals(munged_sizes[i - 1], munged.length, "munged size " + i);
  }

+
+function hex(x) {
+  x &= 15;
+  if (x < 10) {
+    return String.fromCharCode(x + 48);
+  } else {
+    return String.fromCharCode(x + 97 - 10);
+  }
+}
+
+
+function dump_re(re) {
+  var out = "";
+  for (var i = 0; i < re.length; i++) {
+    var c = re.charCodeAt(i);
+    if (c >= 32 && c <= 126) {
+      out += re[i];
+    } else if (c < 256) {
+      out += "\\x" + hex(c >> 4) + hex(c);
+    } else {
+      out += "\\u" + hex(c >> 12) + hex(c >> 8) + hex(c >> 4) + hex(c);
+    }
+  }
+  print ("re = " + out);
+}
+
  var thai_l_thingy = "\u0e44";
  var thai_l_regexp = new RegExp(thai_l_thingy);
  var thai_l_regexp2 = new RegExp("[" + thai_l_thingy + "]");

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to