Author: [EMAIL PROTECTED]
Date: Wed Nov 26 23:27:08 2008
New Revision: 853

Modified:
    branches/bleeding_edge/src/flag-definitions.h
    branches/bleeding_edge/src/jsregexp.cc
    branches/bleeding_edge/src/jsregexp.h
    branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc
    branches/bleeding_edge/src/regexp-macro-assembler-ia32.h
    branches/bleeding_edge/src/regexp-macro-assembler-irregexp.cc
    branches/bleeding_edge/src/regexp-macro-assembler-irregexp.h
    branches/bleeding_edge/src/regexp-macro-assembler.h

Log:
* Complete case independent support in Irregexp.
Review URL: http://codereview.chromium.org/12473

Modified: branches/bleeding_edge/src/flag-definitions.h
==============================================================================
--- branches/bleeding_edge/src/flag-definitions.h       (original)
+++ branches/bleeding_edge/src/flag-definitions.h       Wed Nov 26 23:27:08 2008
@@ -203,8 +203,8 @@
  DEFINE_bool(irregexp, false, "new regular expression code")
  DEFINE_bool(trace_regexps, false, "trace Irregexp execution")
  DEFINE_bool(trace_regexp_bytecodes, false, "trace Irregexp bytecode  
execution")
-DEFINE_bool(attempt_case_independent, false, "attempt to run Irregexp case  
independent")
  DEFINE_bool(irregexp_native, false, "use native code Irregexp  
implementation (IA32 only)")
+DEFINE_bool(disable_jscre, false, "abort if JSCRE is used.  Only useful  
with --irregexp")

  // Testing flags test/cctest/test-{flags,api,serialization}.cc
  DEFINE_bool(testing_bool_flag, true, "testing_bool_flag")

Modified: branches/bleeding_edge/src/jsregexp.cc
==============================================================================
--- branches/bleeding_edge/src/jsregexp.cc      (original)
+++ branches/bleeding_edge/src/jsregexp.cc      Wed Nov 26 23:27:08 2008
@@ -243,6 +243,9 @@
                                  &node,
                                  flags.is_ignore_case());
        if (irregexp_data.is_null()) {
+        if (FLAG_disable_jscre) {
+          UNIMPLEMENTED();
+        }
          result = JscrePrepare(re, pattern, flags);
        } else {
          result = IrregexpPrepare(re, pattern, flags, irregexp_data);
@@ -267,6 +270,9 @@
                                  Handle<Object> index) {
    switch (regexp->TypeTag()) {
      case JSRegExp::JSCRE:
+      if (FLAG_disable_jscre) {
+        UNIMPLEMENTED();
+      }
        return JscreExec(regexp, subject, index);
      case JSRegExp::ATOM:
        return AtomExec(regexp, subject, index);
@@ -283,6 +289,9 @@
                                  Handle<String> subject) {
    switch (regexp->TypeTag()) {
      case JSRegExp::JSCRE:
+      if (FLAG_disable_jscre) {
+        UNIMPLEMENTED();
+      }
        return JscreExecGlobal(regexp, subject);
      case JSRegExp::ATOM:
        return AtomExecGlobal(regexp, subject);
@@ -906,7 +915,7 @@
    inline void IncrementRecursionDepth() { recursion_depth_++; }
    inline void DecrementRecursionDepth() { recursion_depth_--; }

-  inline bool is_case_independent() { return is_case_independent_; }
+  inline bool ignore_case() { return ignore_case_; }

   private:
    EndNode* accept_;
@@ -915,7 +924,7 @@
    List<RegExpNode*>* work_list_;
    int recursion_depth_;
    RegExpMacroAssembler* macro_assembler_;
-  bool is_case_independent_;
+  bool ignore_case_;
  };


@@ -925,7 +934,7 @@
      : next_register_(2 * (capture_count + 1)),
        work_list_(NULL),
        recursion_depth_(0),
-      is_case_independent_(ignore_case) {
+      ignore_case_(ignore_case) {
    accept_ = new EndNode(EndNode::ACCEPT);
    backtrack_ = new EndNode(EndNode::BACKTRACK);
  }
@@ -935,9 +944,6 @@
      RegExpMacroAssembler* macro_assembler,
      RegExpNode* start,
      int capture_count) {
-  if (!FLAG_attempt_case_independent && is_case_independent_) {
-    return Handle<FixedArray>::null();
-  }
    macro_assembler_ = macro_assembler;
    List <RegExpNode*> work_list(0);
    work_list_ = &work_list;
@@ -1306,7 +1312,7 @@
      TextElement elm = elms_->at(i);
      if (elm.type == TextElement::ATOM) {
        Vector<const uc16> quarks = elm.data.u_atom->data();
-      if (compiler->is_case_independent()) {
+      if (compiler->ignore_case()) {
          EmitAtomNonLetters(macro_assembler,
                             elm,
                             quarks,
@@ -1324,7 +1330,7 @@
      }
    }
    // Second, handle case independent letter matches if any.
-  if (compiler->is_case_independent()) {
+  if (compiler->ignore_case()) {
      cp_offset = 0;
      for (int i = 0; i < element_count; i++) {
        TextElement elm = elms_->at(i);
@@ -1360,6 +1366,22 @@
  }


+void TextNode::MakeCaseIndependent() {
+  int element_count = elms_->length();
+  for (int i = 0; i < element_count; i++) {
+    TextElement elm = elms_->at(i);
+    if (elm.type == TextElement::CHAR_CLASS) {
+      RegExpCharacterClass* cc = elm.data.u_char_class;
+      ZoneList<CharacterRange>* ranges = cc->ranges();
+      int range_count = ranges->length();
+      for (int i = 0; i < range_count; i++) {
+        ranges->at(i).AddCaseEquivalents(ranges);
+      }
+    }
+  }
+}
+
+
  bool ChoiceNode::Emit(RegExpCompiler* compiler) {
    int choice_count = alternatives_->length();
    RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
@@ -1477,9 +1499,8 @@
    macro->IfRegisterLT(start_reg_, 0, on_success()->label());
    macro->IfRegisterLT(end_reg_, 0, on_success()->label());
    ASSERT_EQ(start_reg_ + 1, end_reg_);
-  if (compiler->is_case_independent()) {
-    macro->CheckNotBackReferenceCaseIndependent(start_reg_,
-                                                on_failure_->label());
+  if (compiler->ignore_case()) {
+    macro->CheckNotBackReferenceIgnoreCase(start_reg_,  
on_failure_->label());
    } else {
      macro->CheckNotBackReference(start_reg_, on_failure_->label());
    }
@@ -2429,6 +2450,9 @@


  void Analysis::VisitText(TextNode* that) {
+  if (ignore_case_) {
+    that->MakeCaseIndependent();
+  }
    EnsureAnalyzed(that->on_success());
    EnsureAnalyzed(that->on_failure());
  }
@@ -2604,7 +2628,7 @@
                                                captured_body,
                                                compiler.backtrack());
    if (node_return != NULL) *node_return = node;
-  Analysis analysis;
+  Analysis analysis(ignore_case);
    analysis.EnsureAnalyzed(node);

    if (!FLAG_irregexp) {

Modified: branches/bleeding_edge/src/jsregexp.h
==============================================================================
--- branches/bleeding_edge/src/jsregexp.h       (original)
+++ branches/bleeding_edge/src/jsregexp.h       Wed Nov 26 23:27:08 2008
@@ -596,6 +596,7 @@
    RegExpNode* on_failure() { return on_failure_; }
    virtual bool Emit(RegExpCompiler* compiler);
    ZoneList<TextElement>* elements() { return elms_; }
+  void MakeCaseIndependent();
   private:
    RegExpNode* on_failure_;
    ZoneList<TextElement>* elms_;
@@ -741,12 +742,19 @@

  class Analysis: public NodeVisitor {
   public:
+  explicit Analysis(bool ignore_case)
+      : ignore_case_(ignore_case) { }
    void EnsureAnalyzed(RegExpNode* node);

  #define DECLARE_VISIT(Type)                                          \
    virtual void Visit##Type(Type##Node* that);
  FOR_EACH_NODE_TYPE(DECLARE_VISIT)
  #undef DECLARE_VISIT
+
+ private:
+  bool ignore_case_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
  };



Modified: branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc   (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc   Wed Nov 26  
23:27:08 2008
@@ -212,7 +212,7 @@
  }


-void RegExpMacroAssemblerIA32::CheckNotBackReferenceCaseIndependent(
+void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
      int start_reg, Label* on_no_match) {
    UNIMPLEMENTED();
  }

Modified: branches/bleeding_edge/src/regexp-macro-assembler-ia32.h
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-ia32.h    (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-ia32.h    Wed Nov 26  
23:27:08 2008
@@ -50,8 +50,8 @@
                                 Label* on_failure);
    virtual void CheckCurrentPosition(int register_index, Label* on_equal);
    virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
-  virtual void CheckNotBackReferenceCaseIndependent(int start_reg,
-                                                    Label* on_no_match);
+  virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
+                                               Label* on_no_match);
    virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
    virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label*  
on_not_equal);
    virtual void CheckNotCharacterAfterMinusOr(uc16 c,

Modified: branches/bleeding_edge/src/regexp-macro-assembler-irregexp.cc
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-irregexp.cc       
(original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-irregexp.cc       Wed Nov 
 
26 23:27:08 2008
@@ -199,7 +199,7 @@
  }


-void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceCaseIndependent(
+void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase(
      int start_reg,
      Label* on_not_equal) {
    assembler_->CheckNotBackReferenceNoCase(start_reg, on_not_equal);

Modified: branches/bleeding_edge/src/regexp-macro-assembler-irregexp.h
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-irregexp.h        
(original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-irregexp.h        Wed Nov 
26  
23:27:08 2008
@@ -65,8 +65,8 @@
                                               uc16 mask,
                                               Label* on_not_equal);
    virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
-  virtual void CheckNotBackReferenceCaseIndependent(int start_reg,
-                                                    Label* on_no_match);
+  virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
+                                               Label* on_no_match);
    virtual void CheckCharacters(Vector<const uc16> str,
                                 int cp_offset,
                                 Label* on_failure);

Modified: branches/bleeding_edge/src/regexp-macro-assembler.h
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler.h (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler.h Wed Nov 26 23:27:08  
2008
@@ -76,8 +76,8 @@
        int register_index,
        Label* on_equal) = 0;
    virtual void CheckNotBackReference(int start_reg, Label* on_no_match) =  
0;
-  virtual void CheckNotBackReferenceCaseIndependent(int start_reg,
-                                                    Label* on_no_match) =  
0;
+  virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
+                                               Label* on_no_match) = 0;
    // Check the current character for a match with a literal character.  If  
we
    // fail to match then goto the on_failure label.  End of input always
    // matches.  If the label is NULL then we should pop a backtrack address  
off

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to