Author: [EMAIL PROTECTED]
Date: Mon Nov 24 05:21:48 2008
New Revision: 827

Modified:
    branches/experimental/regexp2000/src/assembler-re2k.cc
    branches/experimental/regexp2000/src/assembler-re2k.h
    branches/experimental/regexp2000/src/bytecodes-re2k.h
    branches/experimental/regexp2000/src/flag-definitions.h
    branches/experimental/regexp2000/src/interpreter-re2k.cc
    branches/experimental/regexp2000/src/jsregexp.cc
    branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.cc
    branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.h
    branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.cc
    branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.h
    branches/experimental/regexp2000/src/regexp-macro-assembler.h

Log:
* Match literals in a case independent way.
* When matching constant length sequences, do case independent
   characters first, then case dependent characters, then character
   classes
Review URL: http://codereview.chromium.org/11352

Modified: branches/experimental/regexp2000/src/assembler-re2k.cc
==============================================================================
--- branches/experimental/regexp2000/src/assembler-re2k.cc      (original)
+++ branches/experimental/regexp2000/src/assembler-re2k.cc      Mon Nov 24  
05:21:48 2008
@@ -196,6 +196,25 @@
    EmitOrLink(on_mismatch);
  }

+void Re2kAssembler::OrThenCheckNotCharacter(uc16 c,
+                                            uc16 mask,
+                                            Label* on_mismatch) {
+  Emit(BC_OR_CHECK_NOT_CHAR);
+  Emit16(c);
+  Emit16(mask);
+  EmitOrLink(on_mismatch);
+}
+
+
+void Re2kAssembler::MinusOrThenCheckNotCharacter(uc16 c,
+                                                 uc16 mask,
+                                                 Label* on_mismatch) {
+  Emit(BC_MINUS_OR_CHECK_NOT_CHAR);
+  Emit16(c);
+  Emit16(mask);
+  EmitOrLink(on_mismatch);
+}
+

  void Re2kAssembler::CheckCharacterLT(uc16 limit, Label* on_less) {
    Emit(BC_CHECK_LT);

Modified: branches/experimental/regexp2000/src/assembler-re2k.h
==============================================================================
--- branches/experimental/regexp2000/src/assembler-re2k.h       (original)
+++ branches/experimental/regexp2000/src/assembler-re2k.h       Mon Nov 24  
05:21:48 2008
@@ -62,6 +62,8 @@
    // Checks current char register against a singleton.
    void CheckCharacter(uc16 c, Label* on_match);
    void CheckNotCharacter(uc16 c, Label* on_mismatch);
+  void OrThenCheckNotCharacter(uc16 c, uc16 mask, Label* on_mismatch);
+  void MinusOrThenCheckNotCharacter(uc16 c, uc16 mask, Label* on_mismatch);

    // Used to check current char register against a range.
    void CheckCharacterLT(uc16 limit, Label* on_less);

Modified: branches/experimental/regexp2000/src/bytecodes-re2k.h
==============================================================================
--- branches/experimental/regexp2000/src/bytecodes-re2k.h       (original)
+++ branches/experimental/regexp2000/src/bytecodes-re2k.h       Mon Nov 24  
05:21:48 2008
@@ -52,15 +52,17 @@
  V(LOAD_CURRENT_CHAR, 17, 9) /* load offset32  
addr32                         */ \
  V(CHECK_CHAR,        18, 7) /* check_char uc16  
addr32                       */ \
  V(CHECK_NOT_CHAR,    19, 7) /* check_not_char uc16  
addr32                   */ \
-V(CHECK_LT,          20, 7) /* check_lt uc16  
addr32                         */ \
-V(CHECK_GT,          21, 7) /* check_gr uc16  
addr32                         */ \
-V(CHECK_NOT_BACK_REF, 22, 6) /* check_not_back_ref capture_idx  
addr32       */ \
-V(LOOKUP_MAP1,       23, 11) /* l_map1 start16 bit_map_addr32  
addr32        */ \
-V(LOOKUP_MAP2,       24, 99) /* l_map2 start16  
half_nibble_map_addr32*      */ \
-V(LOOKUP_MAP8,       25, 99) /* l_map8 start16 byte_map  
addr32*             */ \
-V(LOOKUP_HI_MAP8,    26, 99) /* l_himap8 start8 byte_map_addr32  
addr32*     */ \
-V(CHECK_REGISTER_LT, 27, 8) /* check_reg_lt register_index value16  
addr32   */ \
-V(CHECK_REGISTER_GE, 28, 8) /* check_reg_ge register_index value16  
addr32   */ \
+V(OR_CHECK_NOT_CHAR, 20, 9) /* or_check_not_char uc16 uc16  
addr32           */ \
+V(MINUS_OR_CHECK_NOT_CHAR, 21, 9) /* minus_or_check_not_char uc16 uc16  
ad...*/ \
+V(CHECK_LT,          22, 7) /* check_lt uc16  
addr32                         */ \
+V(CHECK_GT,          23, 7) /* check_gr uc16  
addr32                         */ \
+V(CHECK_NOT_BACK_REF, 24, 6) /* check_not_back_ref capture_idx  
addr32       */ \
+V(LOOKUP_MAP1,       25, 11) /* l_map1 start16 bit_map_addr32  
addr32        */ \
+V(LOOKUP_MAP2,       26, 99) /* l_map2 start16  
half_nibble_map_addr32*      */ \
+V(LOOKUP_MAP8,       27, 99) /* l_map8 start16 byte_map  
addr32*             */ \
+V(LOOKUP_HI_MAP8,    28, 99) /* l_himap8 start8 byte_map_addr32  
addr32*     */ \
+V(CHECK_REGISTER_LT, 29, 8) /* check_reg_lt register_index value16  
addr32   */ \
+V(CHECK_REGISTER_GE, 30, 8) /* check_reg_ge register_index value16  
addr32   */ \

  #define DECLARE_BYTECODES(name, code, length) \
    static const int BC_##name = code;

Modified: branches/experimental/regexp2000/src/flag-definitions.h
==============================================================================
--- branches/experimental/regexp2000/src/flag-definitions.h     (original)
+++ branches/experimental/regexp2000/src/flag-definitions.h     Mon Nov 24  
05:21:48 2008
@@ -291,6 +291,7 @@

  DEFINE_bool(trace_regexps, false, "trace regexp execution")
  DEFINE_bool(trace_regexp_bytecodes, false, "trace regexp bytecode  
executon")
+DEFINE_bool(attempt_case_independent, false, "attempt to run re2k case  
independent")
  DEFINE_bool(re2k_native, false, "use native code regexp implementation")

  //

Modified: branches/experimental/regexp2000/src/interpreter-re2k.cc
==============================================================================
--- branches/experimental/regexp2000/src/interpreter-re2k.cc    (original)
+++ branches/experimental/regexp2000/src/interpreter-re2k.cc    Mon Nov 24  
05:21:48 2008
@@ -195,6 +195,25 @@
          }
          break;
        }
+      BYTECODE(OR_CHECK_NOT_CHAR) {
+        int c = Load16(pc + 1);
+        if (c != (current_char | Load16(pc + 3))) {
+          pc = code_base + Load32(pc + 5);
+        } else {
+          pc += BC_OR_CHECK_NOT_CHAR_LENGTH;
+        }
+        break;
+      }
+      BYTECODE(MINUS_OR_CHECK_NOT_CHAR) {
+        int c = Load16(pc + 1);
+        int m = Load16(pc + 3);
+        if (c != ((current_char - m) | m)) {
+          pc = code_base + Load32(pc + 5);
+        } else {
+          pc += BC_MINUS_OR_CHECK_NOT_CHAR_LENGTH;
+        }
+        break;
+      }
        BYTECODE(CHECK_LT) {
          int limit = Load16(pc + 1);
          if (current_char < limit) {

Modified: branches/experimental/regexp2000/src/jsregexp.cc
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.cc    (original)
+++ branches/experimental/regexp2000/src/jsregexp.cc    Mon Nov 24 05:21:48  
2008
@@ -875,14 +875,13 @@

  class RegExpCompiler {
   public:
-  explicit RegExpCompiler(int capture_count);
+  RegExpCompiler(int capture_count, bool ignore_case);

    int AllocateRegister() { return next_register_++; }

    Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
                                RegExpNode* start,
-                              int capture_count,
-                              bool case_independent);
+                              int capture_count);

    inline void AddWork(RegExpNode* node) { work_list_->Add(node); }

@@ -899,6 +898,8 @@
    inline void IncrementRecursionDepth() { recursion_depth_++; }
    inline void DecrementRecursionDepth() { recursion_depth_--; }

+  inline bool is_case_independent() { return is_case_independent_; }
+
   private:
    EndNode* accept_;
    EndNode* backtrack_;
@@ -906,15 +907,17 @@
    List<RegExpNode*>* work_list_;
    int recursion_depth_;
    RegExpMacroAssembler* macro_assembler_;
+  bool is_case_independent_;
  };


  // Attempts to compile the regexp using a Regexp2000 code generator.   
Returns
  // a fixed array or a null handle depending on whether it succeeded.
-RegExpCompiler::RegExpCompiler(int capture_count)
+RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case)
    : next_register_(2 * (capture_count + 1)),
      work_list_(NULL),
-    recursion_depth_(0) {
+    recursion_depth_(0),
+    is_case_independent_(ignore_case) {
    accept_ = new EndNode(EndNode::ACCEPT);
    backtrack_ = new EndNode(EndNode::BACKTRACK);
  }
@@ -923,9 +926,10 @@
  Handle<FixedArray> RegExpCompiler::Assemble(
      RegExpMacroAssembler* macro_assembler,
      RegExpNode* start,
-    int capture_count,
-    bool case_independent) {
-  if (case_independent) return Handle<FixedArray>::null();
+    int capture_count) {
+  if (!FLAG_attempt_case_independent && is_case_independent_) {
+    return Handle<FixedArray>::null();
+  }
    macro_assembler_ = macro_assembler;
    List <RegExpNode*> work_list(0);
    work_list_ = &work_list;
@@ -1110,107 +1114,231 @@
  }


-bool TextNode::Emit(RegExpCompiler* compiler) {
-  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
-  Bind(macro_assembler);
-  int element_count = elms_->length();
-  int cp_offset = 0;
-  for (int i = 0; i < element_count; i++) {
-    TextElement elm = (*elms_)[i];
-    switch (elm.type) {
-      case TextElement::ATOM: {
-        Vector<const uc16> quarks = elm.data.u_atom->data();
-        macro_assembler->CheckCharacters(quarks,
-                                         cp_offset,
-                                         on_failure_->label());
-        cp_offset += quarks.length();
+static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;
+static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;
+
+
+static inline void EmitAtomNonLetters(
+    RegExpMacroAssembler* macro_assembler,
+    TextElement elm,
+    Vector<const uc16> quarks,
+    Label* on_failure,
+    int cp_offset) {
+  unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
+  for (int i = quarks.length() - 1; i >= 0; i--) {
+    uc16 c = quarks[i];
+    int length = uncanonicalize.get(c, '\0', chars);
+    if (length <= 1) {
+      macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
+      macro_assembler->CheckNotCharacter(c, on_failure);
+    }
+  }
+}
+
+
+static bool ShortCutEmitCharacterPair(RegExpMacroAssembler*  
macro_assembler,
+                                      uc16 c1,
+                                      uc16 c2,
+                                      Label* on_failure) {
+  uc16 exor = c1 ^ c2;
+  // Check whether exor has only one bit set.
+  if (((exor - 1) & exor) == 0) {
+    // If c1 and c2 differ only by one bit.
+    // Ecma262UnCanonicalize always gives the highest number last.
+    ASSERT(c2 > c1);
+    macro_assembler->CheckNotCharacterAfterOr(c2, exor, on_failure);
+    return true;
+  } else {
+    ASSERT(c2 > c1);
+    uc16 diff = c2 - c1;
+    if (((diff - 1) & diff) == 0 && c1 >= diff) {
+      // If the characters differ by 2^n but don't differ by one bit then
+      // subtract the difference from the found character, then do the or
+      // trick.  We avoid the theoretical case where negative numbers are
+      // involved in order to simplify code generation.
+      macro_assembler->CheckNotCharacterAfterMinusOr(c2 - diff,
+                                                     diff,
+                                                     on_failure);
+      return true;
+    }
+  }
+  return false;
+}
+
+
+static inline void EmitAtomLetters(
+    RegExpMacroAssembler* macro_assembler,
+    TextElement elm,
+    Vector<const uc16> quarks,
+    Label* on_failure,
+    int cp_offset) {
+  unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
+  for (int i = quarks.length() - 1; i >= 0; i--) {
+    uc16 c = quarks[i];
+    int length = uncanonicalize.get(c, '\0', chars);
+    if (length <= 1) continue;
+    macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
+    Label ok;
+    ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
+    switch (length) {
+      case 2: {
+        if (ShortCutEmitCharacterPair(macro_assembler,
+                                      chars[0],
+                                      chars[1],
+                                      on_failure)) {
+          ok.Unuse();
+        } else {
+          macro_assembler->CheckCharacter(chars[0], &ok);
+          macro_assembler->CheckNotCharacter(chars[1], on_failure);
+          macro_assembler->Bind(&ok);
+        }
          break;
        }
-      case TextElement::CHAR_CLASS: {
-        RegExpCharacterClass* cc = elm.data.u_char_class;
-        macro_assembler->LoadCurrentCharacter(cp_offset,  
on_failure_->label());
-        cp_offset++;
+      case 4:
+        macro_assembler->CheckCharacter(chars[3], &ok);
+        // Fall through!
+      case 3:
+        macro_assembler->CheckCharacter(chars[0], &ok);
+        macro_assembler->CheckCharacter(chars[1], &ok);
+        macro_assembler->CheckNotCharacter(chars[2], on_failure);
+        macro_assembler->Bind(&ok);
+        break;
+      default:
+        UNREACHABLE();
+        break;
+    }
+  }
+}

-        ZoneList<CharacterRange>* ranges = cc->ranges();

-        Label success;
+static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
+                          RegExpCharacterClass* cc,
+                          int cp_offset,
+                          Label* on_failure) {
+  macro_assembler->LoadCurrentCharacter(cp_offset, on_failure);
+  cp_offset++;

-        Label *char_is_in_class =
-            cc->is_negated() ? on_failure_->label() : &success;
+  ZoneList<CharacterRange>* ranges = cc->ranges();

-        int range_count = ranges->length();
+  Label success;

-        if (range_count == 0) {
-          if (!cc->is_negated()) {
-            on_failure()->GoTo(compiler);
-          }
-          break;
-        }
+  Label *char_is_in_class =
+      cc->is_negated() ? on_failure : &success;

-        for (int i = 0; i < range_count - 1; i++) {
-          CharacterRange& range = (*ranges)[i];
-          Label next_range;
-          uc16 from = range.from();
-          uc16 to = range.to();
-          if (to == from) {
-            macro_assembler->CheckCharacter(to, char_is_in_class);
-          } else {
-            if (from != 0) {
-              macro_assembler->CheckCharacterLT(from, &next_range);
-            }
-            if (to != 0xffff) {
-              macro_assembler->CheckCharacterLT(to + 1, char_is_in_class);
-            } else {
-              macro_assembler->GoTo(char_is_in_class);
-            }
-          }
-          macro_assembler->Bind(&next_range);
-        }
+  int range_count = ranges->length();

-        if (range_count != 0) {
-          CharacterRange& range = (*ranges)[range_count - 1];
-          uc16 from = range.from();
-          uc16 to = range.to();
-
-          if (to == from) {
-            if (cc->is_negated()) {
-              macro_assembler->CheckCharacter(to, on_failure_->label());
-            } else {
-              macro_assembler->CheckNotCharacter(to, on_failure_->label());
-            }
-          } else {
-            if (from != 0) {
-              if (!cc->is_negated()) {
-                macro_assembler->CheckCharacterLT(from,  
on_failure_->label());
-              } else {
-                macro_assembler->CheckCharacterLT(from, &success);
-              }
-            }
-            if (to != 0xffff) {
-              if (!cc->is_negated()) {
-                macro_assembler->CheckCharacterGT(to,  
on_failure_->label());
-              } else {
-                macro_assembler->CheckCharacterLT(to + 1,  
on_failure_->label());
-              }
-            } else {
-              if (cc->is_negated()) {
-                macro_assembler->GoTo(on_failure_->label());
-              }
-            }
-          }
-        } else if (cc->is_negated()) {
-          macro_assembler->GoTo(on_failure_->label());
-        }
+  if (range_count == 0) {
+    if (!cc->is_negated()) {
+      macro_assembler->GoTo(on_failure);
+    }
+    return;
+  }

-        macro_assembler->Bind(&success);
+  for (int i = 0; i < range_count - 1; i++) {
+    CharacterRange& range = ranges->at(i);
+    Label next_range;
+    uc16 from = range.from();
+    uc16 to = range.to();
+    if (to == from) {
+      macro_assembler->CheckCharacter(to, char_is_in_class);
+    } else {
+      if (from != 0) {
+        macro_assembler->CheckCharacterLT(from, &next_range);
+      }
+      if (to != 0xffff) {
+        macro_assembler->CheckCharacterLT(to + 1, char_is_in_class);
+      } else {
+        macro_assembler->GoTo(char_is_in_class);
+      }
+    }
+    macro_assembler->Bind(&next_range);
+  }

-        break;
+  CharacterRange& range = ranges->at(range_count - 1);
+  uc16 from = range.from();
+  uc16 to = range.to();
+
+  if (to == from) {
+    if (cc->is_negated()) {
+      macro_assembler->CheckCharacter(to, on_failure);
+    } else {
+      macro_assembler->CheckNotCharacter(to, on_failure);
+    }
+  } else {
+    if (from != 0) {
+      if (!cc->is_negated()) {
+        macro_assembler->CheckCharacterLT(from, on_failure);
+      } else {
+        macro_assembler->CheckCharacterLT(from, &success);
        }
-      default:
-        UNREACHABLE();
-        return false;
+    }
+    if (to != 0xffff) {
+      if (!cc->is_negated()) {
+        macro_assembler->CheckCharacterGT(to, on_failure);
+      } else {
+        macro_assembler->CheckCharacterLT(to + 1, on_failure);
+      }
+    } else {
+      if (cc->is_negated()) {
+        macro_assembler->GoTo(on_failure);
+      }
+    }
+  }
+  macro_assembler->Bind(&success);
+}
+
+
+
+bool TextNode::Emit(RegExpCompiler* compiler) {
+  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
+  Bind(macro_assembler);
+  int element_count = elms_->length();
+  int cp_offset = 0;
+  // First, handle straight character matches.
+  for (int i = 0; i < element_count; i++) {
+    TextElement elm = elms_->at(i);
+    if (elm.type == TextElement::ATOM) {
+      Vector<const uc16> quarks = elm.data.u_atom->data();
+      if (!compiler->is_case_independent()) {
+        macro_assembler->CheckCharacters(quarks,
+                                         cp_offset,
+                                         on_failure_->label());
+      } else {
+        EmitAtomNonLetters(macro_assembler, elm, quarks,  
on_failure_->label(), cp_offset);
+      }
+      cp_offset += quarks.length();
+    } else {
+      ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
+      cp_offset++;
      }
    }
+  // Second, handle case independent letter matches if any.
+  if (compiler->is_case_independent()) {
+    cp_offset = 0;
+    for (int i = 0; i < element_count; i++) {
+      TextElement elm = elms_->at(i);
+      if (elm.type == TextElement::ATOM) {
+        Vector<const uc16> quarks = elm.data.u_atom->data();
+        EmitAtomLetters(macro_assembler, elm, quarks,  
on_failure_->label(), cp_offset);
+        cp_offset += quarks.length();
+      } else {
+        cp_offset++;
+      }
+    }
+  }
+  // If the fast character matches passed then do the character classes.
+  cp_offset = 0;
+  for (int i = 0; i < element_count; i++) {
+    TextElement elm = elms_->at(i);
+    if (elm.type == TextElement::CHAR_CLASS) {
+      RegExpCharacterClass* cc = elm.data.u_char_class;
+      EmitCharClass(macro_assembler, cc, cp_offset, on_failure_->label());
+      cp_offset ++;
+    } else {
+      cp_offset += elm.data.u_atom->data().length();
+    }
+  }
+
    compiler->AddWork(on_failure_);
    macro_assembler->AdvanceCurrentPosition(cp_offset);
    return on_success()->GoTo(compiler);
@@ -1225,14 +1353,14 @@
    // is to use the Dispatch table to try only the relevant ones.
    int i;
    for (i = 0; i < choice_count - 1; i++) {
-    GuardedAlternative alternative = (*alternatives_)[i];
+    GuardedAlternative alternative = alternatives_->at(i);
      Label after;
      Label after_no_pop_cp;
      ZoneList<Guard*>* guards = alternative.guards();
      if (guards != NULL) {
        int guard_count = guards->length();
        for (int j = 0; j < guard_count; j++) {
-        GenerateGuard(macro_assembler, (*guards)[j], &after_no_pop_cp);
+        GenerateGuard(macro_assembler, guards->at(j), &after_no_pop_cp);
        }
      }
      macro_assembler->PushCurrentPosition();
@@ -1246,12 +1374,12 @@
      macro_assembler->PopCurrentPosition();
      macro_assembler->Bind(&after_no_pop_cp);
    }
-  GuardedAlternative alternative = (*alternatives_)[i];
+  GuardedAlternative alternative = alternatives_->at(i);
    ZoneList<Guard*>* guards = alternative.guards();
    if (guards != NULL) {
      int guard_count = guards->length();
      for (int j = 0; j < guard_count; j++) {
-      GenerateGuard(macro_assembler, (*guards)[j], on_failure_->label());
+      GenerateGuard(macro_assembler, guards->at(j), on_failure_->label());
      }
    }
    if (!on_failure_->IsBacktrack()) {
@@ -1932,10 +2060,6 @@
  }


-static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;
-static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;
-
-
  void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) {
    unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    if (IsSingleton()) {
@@ -2412,7 +2536,7 @@
  Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
                                           RegExpNode** node_return,
                                           bool ignore_case) {
-  RegExpCompiler compiler(input->capture_count);
+  RegExpCompiler compiler(input->capture_count, ignore_case);
    // Wrap the body of the regexp in capture #0.
    RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
                                                      0,
@@ -2445,8 +2569,7 @@
                                               ignore_case);
      return compiler.Assemble(&macro_assembler,
                               node,
-                             input->capture_count,
-                             ignore_case);
+                             input->capture_count);
    }
  #endif
    byte codes[1024];
@@ -2454,8 +2577,7 @@
    RegExpMacroAssemblerRe2k macro_assembler(&assembler);
    return compiler.Assemble(&macro_assembler,
                             node,
-                           input->capture_count,
-                           ignore_case);
+                           input->capture_count);
  }



Modified:  
branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.cc
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.cc  
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.cc Mon  
Nov 24 05:21:48 2008
@@ -245,6 +245,27 @@
  }


+void RegExpMacroAssemblerIA32::CheckNotCharacterAfterOr(uc16 c,
+                                                        uc16 mask,
+                                                        Label*  
on_not_equal) {
+  __ mov(eax, Operand(edx));
+  __ or_(eax, mask);
+  __ cmp(eax, c);
+  BranchOrBacktrack(not_equal, on_not_equal);
+}
+
+
+void RegExpMacroAssemblerIA32::CheckNotCharacterAfterMinusOr(
+    uc16 c,
+    uc16 mask,
+    Label* on_not_equal) {
+  __ lea(eax, Operand(edx, -mask));
+  __ or_(eax, mask);
+  __ cmp(eax, c);
+  BranchOrBacktrack(not_equal, on_not_equal);
+}
+
+
  void RegExpMacroAssemblerIA32::DispatchHalfNibbleMap(
      uc16 start,
      Label* half_nibble_map,

Modified: branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.h
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.h   
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.h  Mon  
Nov 24 05:21:48 2008
@@ -54,6 +54,10 @@
    virtual void CheckCurrentPosition(int register_index, Label* on_equal);
    virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
    virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
+  virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label*  
on_not_equal);
+  virtual void CheckNotCharacterAfterMinusOr(uc16 c,
+                                             uc16 mask,
+                                             Label* on_not_equal);
    virtual void DispatchByteMap(uc16 start,
                                 Label* byte_map,
                                 const Vector<Label*>& destinations);

Modified:  
branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.cc
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.cc  
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.cc Mon  
Nov 24 05:21:48 2008
@@ -174,6 +174,21 @@
  }


+void RegExpMacroAssemblerRe2k::CheckNotCharacterAfterOr(uc16 c,
+                                                        uc16 mask,
+                                                        Label*  
on_not_equal) {
+  assembler_->OrThenCheckNotCharacter(c, mask, on_not_equal);
+}
+
+
+void RegExpMacroAssemblerRe2k::CheckNotCharacterAfterMinusOr(
+    uc16 c,
+    uc16 mask,
+    Label* on_not_equal) {
+  assembler_->MinusOrThenCheckNotCharacter(c, mask, on_not_equal);
+}
+
+
  void RegExpMacroAssemblerRe2k::CheckNotBackReference(int start_reg,
                                                       Label* on_not_equal) {
    assembler_->CheckNotBackReference(start_reg, on_not_equal);

Modified: branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.h
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.h   
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.h  Mon  
Nov 24 05:21:48 2008
@@ -60,6 +60,10 @@
    virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
    virtual void CheckCharacter(uc16 c, Label* on_equal);
    virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
+  virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label*  
on_not_equal);
+  virtual void CheckNotCharacterAfterMinusOr(uc16 c,
+                                             uc16 mask,
+                                             Label* on_not_equal);
    virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
    virtual void CheckCharacters(Vector<const uc16> str,
                                 int cp_offset,

Modified: branches/experimental/regexp2000/src/regexp-macro-assembler.h
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler.h       
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler.h       Mon Nov 
 
24 05:21:48 2008
@@ -61,6 +61,10 @@
    virtual void CheckCharacter(uc16 c, Label* on_equal) = 0;
    virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
    virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
+  // Check the current character for a match with a literal string.  If we
+  // fail to match then goto the on_failure label.  End of input always
+  // matches.  If the label is NULL then we should pop a backtrack address  
off
+  // the stack abnd go to that.
    virtual void CheckCharacters(
        Vector<const uc16> str,
        int cp_offset,
@@ -72,11 +76,21 @@
        int register_index,
        Label* on_equal) = 0;
    virtual void CheckNotBackReference(int start_reg, Label* on_no_match) =  
0;
-  // Check the current character for a match with a literal string.  If we
+  // Check the current character for a match with a literal character.  If  
we
    // fail to match then goto the on_failure label.  End of input always
    // matches.  If the label is NULL then we should pop a backtrack address  
off
    // the stack and go to that.
    virtual void CheckNotCharacter(uc16 c, Label* on_not_equal) = 0;
+  // Bitwise or the current character with the given constant and then
+  // check for a match with c.
+  virtual void CheckNotCharacterAfterOr(uc16 c,
+                                        uc16 or_with,
+                                        Label* on_not_equal) = 0;
+  // Subtract a constant from the current character, then or with the given
+  // constant and then check for a match with c.
+  virtual void CheckNotCharacterAfterMinusOr(uc16 c,
+                                             uc16 minus_then_or_with,
+                                             Label* on_not_equal) = 0;
    // Dispatch after looking the current character up in a byte map.  The
    // destinations vector has up to 256 labels.
    virtual void DispatchByteMap(

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to