Author: [EMAIL PROTECTED]
Date: Thu Nov 27 02:35:06 2008
New Revision: 856

Modified:
    branches/bleeding_edge/src/jsregexp.cc
    branches/bleeding_edge/src/jsregexp.h
    branches/bleeding_edge/src/parser.cc
    branches/bleeding_edge/src/parser.h
    branches/bleeding_edge/test/cctest/test-regexp.cc

Log:
Fixed some assertion propagation issues and added non-multiline $
propagation.


Modified: branches/bleeding_edge/src/jsregexp.cc
==============================================================================
--- branches/bleeding_edge/src/jsregexp.cc      (original)
+++ branches/bleeding_edge/src/jsregexp.cc      Thu Nov 27 02:35:06 2008
@@ -218,7 +218,7 @@
      FlattenString(pattern);
      RegExpParseResult parse_result;
      FlatStringReader reader(pattern);
-    if (!ParseRegExp(&reader, &parse_result)) {
+    if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
        // Throw an exception if we fail to parse the pattern.
        ThrowRegExpException(re,
                             pattern,
@@ -241,7 +241,8 @@
        Handle<FixedArray> irregexp_data =
            RegExpEngine::Compile(&parse_result,
                                  &node,
-                                flags.is_ignore_case());
+                                flags.is_ignore_case(),
+                                flags.is_multiline());
        if (irregexp_data.is_null()) {
          if (FLAG_disable_jscre) {
            UNIMPLEMENTED();
@@ -980,7 +981,8 @@
    // TODO(erikcorry): Implement support.
    if (info_.follows_word_interest ||
        info_.follows_newline_interest ||
-      info_.follows_start_interest) {
+      info_.follows_start_interest ||
+      info_.at_end) {
      return false;
    }
    if (label_.is_bound()) {
@@ -1004,7 +1006,8 @@
  bool EndNode::GoTo(RegExpCompiler* compiler) {
    if (info()->follows_word_interest ||
        info()->follows_newline_interest ||
-      info()->follows_start_interest) {
+      info()->follows_start_interest ||
+      info()->at_end) {
      return false;
    }
    if (!label()->is_bound()) {
@@ -1629,14 +1632,21 @@
                  "fontcolor=lightgrey, margin=0.1, fontsize=10, label=\"{",
                  that);
    NodeInfo* info = that->info();
-  stream()->Add("{NI|%i}|{SI|%i}|{WI|%i}",
+  stream()->Add("{NI|%i}|{WI|%i}|{SI|%i}",
                  info->follows_newline_interest,
-                info->follows_start_interest,
-                info->follows_word_interest);
-  stream()->Add("|{DN|%i}|{DS|%i}|{DW|%i}",
+                info->follows_word_interest,
+                info->follows_start_interest);
+  stream()->Add("|{DN|%i}|{DW|%i}|{DS|%i}|{AE|%i}",
                  info->determine_newline,
+                info->determine_word,
                  info->determine_start,
-                info->determine_word);
+                info->at_end);
+  if (info->follows_newline != NodeInfo::UNKNOWN)
+    stream()->Add("|{FN|%i}", info->follows_newline);
+  if (info->follows_word != NodeInfo::UNKNOWN)
+    stream()->Add("|{FW|%i}", info->follows_word);
+  if (info->follows_start != NodeInfo::UNKNOWN)
+    stream()->Add("|{FS|%i}", info->follows_start);
    Label* label = that->label();
    if (label->is_bound())
      stream()->Add("|{@|%x}", label->pos());
@@ -1924,11 +1934,14 @@
      case BOUNDARY: case NON_BOUNDARY:
        info.follows_word_interest = true;
        break;
-    case END_OF_LINE: case END_OF_INPUT:
+    case END_OF_INPUT:
+      info.at_end = true;
+      break;
+    case END_OF_LINE:
        // This is wrong but has the effect of making the compiler abort.
-      info.follows_start_interest = true;
+      info.at_end = true;
    }
-  return on_success->PropagateInterest(&info);
+  return on_success->PropagateForward(&info);
  }


@@ -2216,7 +2229,7 @@
  RegExpNode* RegExpNode::GetSibling(NodeInfo* info) {
    for (int i = 0; i < siblings_.length(); i++) {
      RegExpNode* sibling = siblings_.Get(i);
-    if (sibling->info()->SameInterests(info))
+    if (sibling->info()->HasSameForwardInterests(info))
        return sibling;
    }
    return NULL;
@@ -2225,58 +2238,64 @@

  template <class C>
  static RegExpNode* PropagateToEndpoint(C* node, NodeInfo* info) {
-  RegExpNode* sibling = node->GetSibling(info);
+  NodeInfo full_info(*node->info());
+  full_info.AddFromPreceding(info);
+  RegExpNode* sibling = node->GetSibling(&full_info);
    if (sibling != NULL) return sibling;
    node->EnsureSiblings();
    sibling = new C(*node);
-  sibling->info()->AdoptInterests(info);
+  sibling->info()->AddFromPreceding(&full_info);
    node->AddSibling(sibling);
    return sibling;
  }


-RegExpNode* ActionNode::PropagateInterest(NodeInfo* info) {
-  RegExpNode* sibling = GetSibling(info);
+RegExpNode* ActionNode::PropagateForward(NodeInfo* info) {
+  NodeInfo full_info(*this->info());
+  full_info.AddFromPreceding(info);
+  RegExpNode* sibling = GetSibling(&full_info);
    if (sibling != NULL) return sibling;
    EnsureSiblings();
    ActionNode* action = new ActionNode(*this);
-  action->info()->AdoptInterests(info);
+  action->info()->AddFromPreceding(&full_info);
    AddSibling(action);
-  action->set_on_success(action->on_success()->PropagateInterest(info));
+  action->set_on_success(action->on_success()->PropagateForward(info));
    return action;
  }


-RegExpNode* ChoiceNode::PropagateInterest(NodeInfo* info) {
-  RegExpNode* sibling = GetSibling(info);
+RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) {
+  NodeInfo full_info(*this->info());
+  full_info.AddFromPreceding(info);
+  RegExpNode* sibling = GetSibling(&full_info);
    if (sibling != NULL) return sibling;
    EnsureSiblings();
    ChoiceNode* choice = new ChoiceNode(*this);
-  choice->info()->AdoptInterests(info);
+  choice->info()->AddFromPreceding(&full_info);
    AddSibling(choice);
    ZoneList<GuardedAlternative>* old_alternatives = alternatives();
    int count = old_alternatives->length();
    choice->alternatives_ = new ZoneList<GuardedAlternative>(count);
    for (int i = 0; i < count; i++) {
      GuardedAlternative alternative = old_alternatives->at(i);
-    alternative.set_node(alternative.node()->PropagateInterest(info));
+    alternative.set_node(alternative.node()->PropagateForward(info));
      choice->alternatives()->Add(alternative);
    }
    return choice;
  }


-RegExpNode* EndNode::PropagateInterest(NodeInfo* info) {
+RegExpNode* EndNode::PropagateForward(NodeInfo* info) {
    return PropagateToEndpoint(this, info);
  }


-RegExpNode* BackReferenceNode::PropagateInterest(NodeInfo* info) {
+RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) {
    return PropagateToEndpoint(this, info);
  }


-RegExpNode* TextNode::PropagateInterest(NodeInfo* info) {
+RegExpNode* TextNode::PropagateForward(NodeInfo* info) {
    return PropagateToEndpoint(this, info);
  }

@@ -2455,15 +2474,21 @@
    }
    EnsureAnalyzed(that->on_success());
    EnsureAnalyzed(that->on_failure());
+  NodeInfo* info = that->info();
+  NodeInfo* next_info = that->on_success()->info();
+  // If the following node is interested in what it follows then this
+  // node must determine it.
+  info->determine_newline = next_info->follows_newline_interest;
+  info->determine_word = next_info->follows_word_interest;
+  info->determine_start = next_info->follows_start_interest;
  }


  void Analysis::VisitAction(ActionNode* that) {
-  RegExpNode* next = that->on_success();
-  EnsureAnalyzed(next);
-  that->info()->determine_newline = next->info()->prev_determine_newline();
-  that->info()->determine_word = next->info()->prev_determine_word();
-  that->info()->determine_start = next->info()->prev_determine_start();
+  EnsureAnalyzed(that->on_success());
+  // If the next node is interested in what it follows then this node
+  // has to be interested too so it can pass the information on.
+  that->info()->AddFromFollowing(that->on_success()->info());
  }


@@ -2472,9 +2497,9 @@
    for (int i = 0; i < that->alternatives()->length(); i++) {
      RegExpNode* node = that->alternatives()->at(i).node();
      EnsureAnalyzed(node);
-    info->determine_newline |= node->info()->prev_determine_newline();
-    info->determine_word |= node->info()->prev_determine_word();
-    info->determine_start |= node->info()->prev_determine_start();
+    // Anything the following nodes need to know has to be known by
+    // this node also, so it can pass it on.
+    info->AddFromFollowing(node->info());
    }
    if (!that->table_calculated()) {
      DispatchTableConstructor cons(that->table());
@@ -2607,7 +2632,8 @@

  Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
                                           RegExpNode** node_return,
-                                         bool ignore_case) {
+                                         bool ignore_case,
+                                         bool is_multiline) {
    RegExpCompiler compiler(input->capture_count, ignore_case);
    // Wrap the body of the regexp in capture #0.
    RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,

Modified: branches/bleeding_edge/src/jsregexp.h
==============================================================================
--- branches/bleeding_edge/src/jsregexp.h       (original)
+++ branches/bleeding_edge/src/jsregexp.h       Thu Nov 27 02:35:06 2008
@@ -437,6 +437,10 @@


  struct NodeInfo {
+  enum PrecedingInfo {
+    UNKNOWN = -1, FALSE = 0, TRUE = 1
+  };
+
    NodeInfo()
        : being_analyzed(false),
          been_analyzed(false),
@@ -445,34 +449,59 @@
          determine_start(false),
          follows_word_interest(false),
          follows_newline_interest(false),
-        follows_start_interest(false) { }
-  bool SameInterests(NodeInfo* that) {
-    return (follows_word_interest == that->follows_word_interest)
+        follows_start_interest(false),
+        at_end(false),
+        follows_word(UNKNOWN),
+        follows_newline(UNKNOWN),
+        follows_start(UNKNOWN) { }
+
+  bool HasSameForwardInterests(NodeInfo* that) {
+    return (at_end == that->at_end)
+        && (follows_word_interest == that->follows_word_interest)
          && (follows_newline_interest == that->follows_newline_interest)
          && (follows_start_interest == that->follows_start_interest);
    }
-  void AdoptInterests(NodeInfo* that) {
-    follows_word_interest = that->follows_word_interest;
-    follows_newline_interest = that->follows_newline_interest;
-    follows_start_interest = that->follows_start_interest;
-  }
-  bool prev_determine_word() {
-    return determine_word || follows_word_interest;
-  }
-  bool prev_determine_newline() {
-    return determine_newline || follows_newline_interest;
+
+  // Updates the interests of this node given the interests of the
+  // node preceding it.
+  void AddFromPreceding(NodeInfo* that) {
+    at_end |= that->at_end;
+    follows_word_interest |= that->follows_word_interest;
+    follows_newline_interest |= that->follows_newline_interest;
+    follows_start_interest |= that->follows_start_interest;
    }
-  bool prev_determine_start() {
-    return determine_start || follows_start_interest;
+
+  // Sets the interests of this node to include the interests of the
+  // following node.
+  void AddFromFollowing(NodeInfo* that) {
+    follows_word_interest |= that->follows_word_interest;
+    follows_newline_interest |= that->follows_newline_interest;
+    follows_start_interest |= that->follows_start_interest;
    }
+
    bool being_analyzed: 1;
    bool been_analyzed: 1;
+
+  // These bits are set if this node must propagate forward information
+  // about the last character it consumed (or, in the case of 'start',
+  // if it is at the start of the input).
    bool determine_word: 1;
    bool determine_newline: 1;
    bool determine_start: 1;
+
+  // These bits are set of this node has to know what the preceding
+  // character was.
    bool follows_word_interest: 1;
    bool follows_newline_interest: 1;
    bool follows_start_interest: 1;
+
+  bool at_end: 1;
+
+  // These bits are set if the node can make assumptions about what
+  // the previous character was.
+  PrecedingInfo follows_word: 2;
+  PrecedingInfo follows_newline: 2;
+  PrecedingInfo follows_start: 2;
  };


@@ -511,7 +540,13 @@
    // Until the implementation is complete we will return true for success  
and
    // false for failure.
    virtual bool Emit(RegExpCompiler* compiler) = 0;
-  virtual RegExpNode* PropagateInterest(NodeInfo* info) = 0;
+
+  // Propagates the given interest information forward.  When seeing
+  // \bfoo for instance, the \b is implemented by propagating forward
+  // to the 'foo' string that it should only succeed if its first
+  // character is a letter xor the previous character was a letter.
+  virtual RegExpNode* PropagateForward(NodeInfo* info) = 0;
+
    NodeInfo* info() { return &info_; }
    virtual bool IsBacktrack() { return false; }
    RegExpNode* GetSibling(NodeInfo* info);
@@ -558,7 +593,7 @@
    static ActionNode* EscapeSubmatch(int reg, RegExpNode* on_success);
    virtual void Accept(NodeVisitor* visitor);
    virtual bool Emit(RegExpCompiler* compiler);
-  virtual RegExpNode* PropagateInterest(NodeInfo* info);
+  virtual RegExpNode* PropagateForward(NodeInfo* info);
   private:
    union {
      struct {
@@ -592,7 +627,7 @@
          on_failure_(on_failure),
          elms_(elms) { }
    virtual void Accept(NodeVisitor* visitor);
-  virtual RegExpNode* PropagateInterest(NodeInfo* info);
+  virtual RegExpNode* PropagateForward(NodeInfo* info);
    RegExpNode* on_failure() { return on_failure_; }
    virtual bool Emit(RegExpCompiler* compiler);
    ZoneList<TextElement>* elements() { return elms_; }
@@ -618,7 +653,7 @@
    int start_register() { return start_reg_; }
    int end_register() { return end_reg_; }
    virtual bool Emit(RegExpCompiler* compiler);
-  virtual RegExpNode* PropagateInterest(NodeInfo* info);
+  virtual RegExpNode* PropagateForward(NodeInfo* info);
   private:
    RegExpNode* on_failure_;
    int start_reg_;
@@ -632,7 +667,7 @@
    explicit EndNode(Action action) : action_(action) { }
    virtual void Accept(NodeVisitor* visitor);
    virtual bool Emit(RegExpCompiler* compiler);
-  virtual RegExpNode* PropagateInterest(NodeInfo* info);
+  virtual RegExpNode* PropagateForward(NodeInfo* info);
    virtual bool IsBacktrack() { return action_ == BACKTRACK; }
    virtual bool GoTo(RegExpCompiler* compiler);
   private:
@@ -683,7 +718,7 @@
    DispatchTable* table() { return &table_; }
    RegExpNode* on_failure() { return on_failure_; }
    virtual bool Emit(RegExpCompiler* compiler);
-  virtual RegExpNode* PropagateInterest(NodeInfo* info);
+  virtual RegExpNode* PropagateForward(NodeInfo* info);
    bool table_calculated() { return table_calculated_; }
    void set_table_calculated(bool b) { table_calculated_ = b; }
    bool being_calculated() { return being_calculated_; }
@@ -770,7 +805,8 @@
   public:
    static Handle<FixedArray> Compile(RegExpParseResult* input,
                                      RegExpNode** node_return,
-                                    bool ignore_case);
+                                    bool ignore_case,
+                                    bool multiline);
    static void DotPrint(const char* label, RegExpNode* node);
  };


Modified: branches/bleeding_edge/src/parser.cc
==============================================================================
--- branches/bleeding_edge/src/parser.cc        (original)
+++ branches/bleeding_edge/src/parser.cc        Thu Nov 27 02:35:06 2008
@@ -545,7 +545,7 @@
    bool CaptureAvailable(int index);
    uc32 current_;
    bool has_more_;
-  bool multiline_mode_;
+  bool multiline_;
    int next_pos_;
    FlatStringReader* in_;
    Handle<String>* error_;
@@ -3499,10 +3499,10 @@

  RegExpParser::RegExpParser(FlatStringReader* in,
                             Handle<String>* error,
-                           bool multiline_mode)
+                           bool multiline)
    : current_(kEndMarker),
      has_more_(true),
-    multiline_mode_(multiline_mode),
+    multiline_(multiline),
      next_pos_(0),
      in_(in),
      error_(error),
@@ -3617,16 +3617,16 @@
      case '^': {
        Advance();
        RegExpAssertion::Type type =
-          multiline_mode_ ? RegExpAssertion::START_OF_LINE :
-                            RegExpAssertion::START_OF_INPUT;
+          multiline_ ? RegExpAssertion::START_OF_LINE :
+                       RegExpAssertion::START_OF_INPUT;
        builder.AddAssertion(new RegExpAssertion(type));
        continue;
      }
      case '$': {
        Advance();
        RegExpAssertion::Type type =
-          multiline_mode_ ? RegExpAssertion::END_OF_LINE :
-                            RegExpAssertion::END_OF_INPUT;
+          multiline_ ? RegExpAssertion::END_OF_LINE :
+                       RegExpAssertion::END_OF_INPUT;
        builder.AddAssertion(new RegExpAssertion(type));
        continue;
      }
@@ -4294,10 +4294,11 @@
  }


-bool ParseRegExp(FlatStringReader* input, RegExpParseResult* result) {
+bool ParseRegExp(FlatStringReader* input,
+                 bool multiline,
+                 RegExpParseResult* result) {
    ASSERT(result != NULL);
-  // TODO(plesner): Get multiline flag somehow
-  RegExpParser parser(input, &result->error, false);
+  RegExpParser parser(input, &result->error, multiline);
    bool ok = true;
    result->tree = parser.ParsePattern(&ok);
    if (!ok) {

Modified: branches/bleeding_edge/src/parser.h
==============================================================================
--- branches/bleeding_edge/src/parser.h (original)
+++ branches/bleeding_edge/src/parser.h Thu Nov 27 02:35:06 2008
@@ -145,7 +145,9 @@
                           v8::Extension* extension);


-bool ParseRegExp(FlatStringReader* input, RegExpParseResult* result);
+bool ParseRegExp(FlatStringReader* input,
+                 bool multiline,
+                 RegExpParseResult* result);


  // Support for doing lazy compilation. The script is the script containing  
full

Modified: branches/bleeding_edge/test/cctest/test-regexp.cc
==============================================================================
--- branches/bleeding_edge/test/cctest/test-regexp.cc   (original)
+++ branches/bleeding_edge/test/cctest/test-regexp.cc   Thu Nov 27 02:35:06  
2008
@@ -56,7 +56,7 @@
    ZoneScope zone_scope(DELETE_ON_EXIT);
    FlatStringReader reader(CStrVector(input));
    RegExpParseResult result;
-  CHECK(v8::internal::ParseRegExp(&reader, &result));
+  CHECK(v8::internal::ParseRegExp(&reader, false, &result));
    CHECK(result.tree != NULL);
    CHECK(result.error.is_null());
    SmartPointer<const char> output = result.tree->ToString();
@@ -69,7 +69,7 @@
    ZoneScope zone_scope(DELETE_ON_EXIT);
    FlatStringReader reader(CStrVector(input));
    RegExpParseResult result;
-  CHECK(v8::internal::ParseRegExp(&reader, &result));
+  CHECK(v8::internal::ParseRegExp(&reader, false, &result));
    CHECK(result.tree != NULL);
    CHECK(result.error.is_null());
    return result.has_character_escapes;
@@ -257,7 +257,7 @@
    ZoneScope zone_scope(DELETE_ON_EXIT);
    FlatStringReader reader(CStrVector(input));
    RegExpParseResult result;
-  CHECK_EQ(false, v8::internal::ParseRegExp(&reader, &result));
+  CHECK_EQ(false, v8::internal::ParseRegExp(&reader, false, &result));
    CHECK(result.tree == NULL);
    CHECK(!result.error.is_null());
    SmartPointer<char> str = result.error->ToCString(ALLOW_NULLS);
@@ -373,23 +373,23 @@
  }


-static RegExpNode* Compile(const char* input) {
+static RegExpNode* Compile(const char* input, bool multiline) {
    FlatStringReader reader(CStrVector(input));
    RegExpParseResult result;
-  if (!v8::internal::ParseRegExp(&reader, &result))
+  if (!v8::internal::ParseRegExp(&reader, multiline, &result))
      return NULL;
    RegExpNode* node = NULL;
-  RegExpEngine::Compile(&result, &node, false);
+  RegExpEngine::Compile(&result, &node, false, multiline);
    return node;
  }


  static void Execute(const char* input,
-                    const char* str,
+                    bool multiline,
                      bool dot_output = false) {
    v8::HandleScope scope;
    ZoneScope zone_scope(DELETE_ON_EXIT);
-  RegExpNode* node = Compile(input);
+  RegExpNode* node = Compile(input, multiline);
    USE(node);
  #ifdef DEBUG
    if (dot_output) {
@@ -400,14 +400,6 @@
  }


-TEST(Execution) {
-  V8::Initialize(NULL);
-  Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbegh");
-  Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbefh");
-  Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbefd");
-}
-
-
  class TestConfig {
   public:
    typedef int Key;
@@ -1043,8 +1035,8 @@
  TEST(SimplePropagation) {
    v8::HandleScope scope;
    ZoneScope zone_scope(DELETE_ON_EXIT);
-  RegExpNode* node = Compile("(a|^b|c)");
-  CHECK(node->info()->determine_start);
+  RegExpNode* node = Compile("(a|^b|c)", false);
+  CHECK(node->info()->follows_start_interest);
  }


@@ -1061,7 +1053,7 @@


  TEST(RangeCanonicalization) {
-  ASSERT((CanonRange(0) & CharacterRange::kStartMarker) != 0);
+  CHECK((CanonRange(0) & CharacterRange::kStartMarker) != 0);
    // Check that we arrive at the same result when using the basic
    // range canonicalization primitives as when using immediate
    // canonicalization.
@@ -1175,5 +1167,5 @@

  TEST(Graph) {
    V8::Initialize(NULL);
-  Execute("\\w+", "", true);
+  Execute("(?:foo|bar$)", false, true);
  }

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to