Author: [EMAIL PROTECTED]
Date: Wed Nov 19 02:16:41 2008
New Revision: 793

Modified:
    branches/experimental/regexp2000/src/heap.cc
    branches/experimental/regexp2000/src/heap.h
    branches/experimental/regexp2000/src/jsregexp.cc
    branches/experimental/regexp2000/src/objects-inl.h
    branches/experimental/regexp2000/src/objects.cc
    branches/experimental/regexp2000/src/objects.h
    branches/experimental/regexp2000/src/parser.cc
    branches/experimental/regexp2000/src/parser.h
    branches/experimental/regexp2000/test/cctest/test-regexp.cc

Log:
Introduced flat string reader abstraction which reads directly from a
flat string independent of the character width.  Replaced the
stream-based input to the regexp parser with a flat string reader and
dropped the 'next' field; now Next() reads directly from the string.


Modified: branches/experimental/regexp2000/src/heap.cc
==============================================================================
--- branches/experimental/regexp2000/src/heap.cc        (original)
+++ branches/experimental/regexp2000/src/heap.cc        Wed Nov 19 02:16:41 2008
@@ -390,8 +390,7 @@
    }
    Counters::objs_since_last_young.Set(0);

-  // Process weak handles post gc.
-  GlobalHandles::PostGarbageCollectionProcessing();
+  PostGarbageCollectionProcessing();

    if (collector == MARK_COMPACTOR) {
      // Register the amount of external allocated memory.
@@ -403,6 +402,14 @@
      ASSERT(!allocation_allowed_);
      global_gc_epilogue_callback_();
    }
+}
+
+
+void Heap::PostGarbageCollectionProcessing() {
+  // Process weak handles post gc.
+  GlobalHandles::PostGarbageCollectionProcessing();
+  // Update flat string readers.
+  FlatStringReader::PostGarbageCollectionProcessing();
  }



Modified: branches/experimental/regexp2000/src/heap.h
==============================================================================
--- branches/experimental/regexp2000/src/heap.h (original)
+++ branches/experimental/regexp2000/src/heap.h Wed Nov 19 02:16:41 2008
@@ -578,6 +578,9 @@
    static void GarbageCollectionPrologue();
    static void GarbageCollectionEpilogue();

+  // Code that should be executed after the garbage collection proper.
+  static void PostGarbageCollectionProcessing();
+
    // Performs garbage collection operation.
    // Returns whether required_space bytes are available after the  
collection.
    static bool CollectGarbage(int required_space, AllocationSpace space);

Modified: branches/experimental/regexp2000/src/jsregexp.cc
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.cc    (original)
+++ branches/experimental/regexp2000/src/jsregexp.cc    Wed Nov 19 02:16:41  
2008
@@ -203,14 +203,14 @@
    Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern,  
flags);
    bool in_cache = !cached.is_null();
    Handle<Object> result;
-  StringShape shape(*pattern);
    if (in_cache) {
      re->set_data(*cached);
      result = re;
    } else {
-    SafeStringInputBuffer buffer(pattern.location());
+    FlattenString(pattern);
      RegExpParseResult parse_result;
-    if (!ParseRegExp(&buffer, &parse_result)) {
+    FlatStringReader reader(pattern);
+    if (!ParseRegExp(&reader, &parse_result)) {
        // Throw an exception if we fail to parse the pattern.
        ThrowRegExpException(re,
                             pattern,

Modified: branches/experimental/regexp2000/src/objects-inl.h
==============================================================================
--- branches/experimental/regexp2000/src/objects-inl.h  (original)
+++ branches/experimental/regexp2000/src/objects-inl.h  Wed Nov 19 02:16:41  
2008
@@ -279,6 +279,16 @@
  }


+uc32 FlatStringReader::Get(int index) {
+  ASSERT(0 <= index && index <= length_);
+  if (is_ascii_) {
+    return static_cast<const byte*>(start_)[index];
+  } else {
+    return static_cast<const uc16*>(start_)[index];
+  }
+}
+
+
  bool Object::IsNumber() {
    return IsSmi() || IsHeapNumber();
  }

Modified: branches/experimental/regexp2000/src/objects.cc
==============================================================================
--- branches/experimental/regexp2000/src/objects.cc     (original)
+++ branches/experimental/regexp2000/src/objects.cc     Wed Nov 19 02:16:41 2008
@@ -3501,6 +3501,57 @@
  }


+FlatStringReader* FlatStringReader::top_ = NULL;
+
+
+FlatStringReader::FlatStringReader(Handle<String> str)
+  : str_(str.location()),
+    length_(str->length()),
+    prev_(top_) {
+  top_ = this;
+  RefreshState();
+}
+
+
+FlatStringReader::FlatStringReader(Vector<const char> input)
+  : str_(NULL),
+    is_ascii_(true),
+    length_(input.length()),
+    start_(input.start()),
+    prev_(top_) {
+  top_ = this;
+}
+
+
+FlatStringReader::~FlatStringReader() {
+  ASSERT_EQ(top_, this);
+  top_ = prev_;
+}
+
+
+void FlatStringReader::RefreshState() {
+  if (str_ == NULL) return;
+  Handle<String> str(str_);
+  StringShape shape(*str);
+  ASSERT(str->IsFlat(shape));
+  is_ascii_ = shape.IsAsciiRepresentation();
+  if (is_ascii_) {
+    start_ = str->ToAsciiVector().start();
+  } else {
+    start_ = str->ToUC16Vector().start();
+  }
+}
+
+
+void FlatStringReader::PostGarbageCollectionProcessing() {
+  FlatStringReader* current = top_;
+  while (current != NULL) {
+    current->RefreshState();
+    current = current->prev_;
+  }
+}
+
+
  void StringInputBuffer::Seek(unsigned pos) {
    Reset(pos, input_);
  }

Modified: branches/experimental/regexp2000/src/objects.h
==============================================================================
--- branches/experimental/regexp2000/src/objects.h      (original)
+++ branches/experimental/regexp2000/src/objects.h      Wed Nov 19 02:16:41 2008
@@ -3590,6 +3590,28 @@
  };


+// A flat string reader provides random access to the contents of a
+// string independent of the character width of the string.  The handle
+// must be valid as long as the reader is being used.
+class FlatStringReader BASE_EMBEDDED {
+ public:
+  explicit FlatStringReader(Handle<String> str);
+  explicit FlatStringReader(Vector<const char> input);
+  ~FlatStringReader();
+  void RefreshState();
+  inline uc32 Get(int index);
+  int length() { return length_; }
+  static void PostGarbageCollectionProcessing();
+ private:
+  String** str_;
+  bool is_ascii_;
+  int length_;
+  const void* start_;
+  FlatStringReader* prev_;
+  static FlatStringReader* top_;
+};
+
+
  // Note that StringInputBuffers are not valid across a GC!  To fix this
  // it would have to store a String Handle instead of a String* and
  // AsciiStringReadBlock would have to be modified to use memcpy.

Modified: branches/experimental/regexp2000/src/parser.cc
==============================================================================
--- branches/experimental/regexp2000/src/parser.cc      (original)
+++ branches/experimental/regexp2000/src/parser.cc      Wed Nov 19 02:16:41 2008
@@ -496,7 +496,7 @@

  class RegExpParser {
   public:
-  RegExpParser(unibrow::CharacterStream* in,
+  RegExpParser(FlatStringReader* in,
                 Handle<String>* error,
                 bool multiline_mode);
    RegExpTree* ParsePattern(bool* ok);
@@ -531,36 +531,26 @@
    RegExpTree* ReportError(Vector<const char> message, bool* ok);
    void Advance();
    void Advance(int dist);
-  // Pushes a read character (or potentially some other character) back
-  // on the input stream. After pushing it back, it becomes the character
-  // returned by current(). There is a limited amount of push-back buffer.
-  // A function using PushBack should check that it doesn't push back more
-  // than kMaxPushback characters, and it should not push back more  
characters
-  // than it has read.
-  void PushBack(uc32 character);
-  bool CanPushBack();
+  void Reset(int pos);

    bool HasCharacterEscapes();

    int captures_started() { return captures_ == NULL ? 0 :  
captures_->length(); }
+  int position() { return next_pos_ - 1; }

    static const uc32 kEndMarker = unibrow::Utf8::kBadChar;
   private:
    uc32 current() { return current_; }
-  uc32 next() { return next_; }
    bool has_more() { return has_more_; }
-  bool has_next() { return has_next_; }
-  unibrow::CharacterStream* in() { return in_; }
+  bool has_next() { return next_pos_ < in()->length(); }
+  uc32 Next();
+  FlatStringReader* in() { return in_; }
    uc32 current_;
-  uc32 next_;
    bool has_more_;
-  bool has_next_;
    bool multiline_mode_;
-  unibrow::CharacterStream* in_;
+  int next_pos_;
+  FlatStringReader* in_;
    Handle<String>* error_;
-  static const int kMaxPushback = 5;
-  int pushback_count_;
-  uc32 pushback_buffer_[kMaxPushback];
    bool has_character_escapes_;
    ZoneList<RegExpCapture*>* captures_;
  };
@@ -3506,63 +3496,53 @@
  // Regular expressions


-RegExpParser::RegExpParser(unibrow::CharacterStream* in,
+RegExpParser::RegExpParser(FlatStringReader* in,
                             Handle<String>* error,
                             bool multiline_mode)
    : current_(kEndMarker),
-    next_(kEndMarker),
      has_more_(true),
-    has_next_(true),
      multiline_mode_(multiline_mode),
+    next_pos_(0),
      in_(in),
      error_(error),
-    pushback_count_(0),
      has_character_escapes_(false),
      captures_(NULL) {
-  Advance(2);
+  Advance(1);
  }


-void RegExpParser::Advance() {
-  current_ = next_;
-  has_more_ = has_next_;
-  if (pushback_count_ > 0) {
-    pushback_count_--;
-    next_ = pushback_buffer_[pushback_count_];
-  } else if (in()->has_more()) {
-    next_ = in()->GetNext();
+uc32 RegExpParser::Next() {
+  if (has_next()) {
+    return in()->Get(next_pos_);
    } else {
-    next_ = kEndMarker;
-    has_next_ = false;
+    return kEndMarker;
    }
  }


-void RegExpParser::Advance(int dist) {
-  for (int i = 0; i < dist; i++)
-    Advance();
-}
-
-
-void RegExpParser::PushBack(uc32 character) {
-  if (has_next_) {
-    ASSERT(pushback_count_ < kMaxPushback);
-    pushback_buffer_[pushback_count_] = next_;
-    pushback_count_++;
+void RegExpParser::Advance() {
+  if (next_pos_ < in()->length()) {
+    current_ = in()->Get(next_pos_);
+    next_pos_++;
+  } else {
+    current_ = kEndMarker;
+    has_more_ = false;
    }
+}

-  next_ = current_;
-  has_next_ = has_more_;

-  current_ = character;
-  has_more_ = true;
+void RegExpParser::Reset(int pos) {
+  next_pos_ = pos;
+  Advance();
  }


-bool RegExpParser::CanPushBack() {
-  return (pushback_count_ < kMaxPushback);
+void RegExpParser::Advance(int dist) {
+  for (int i = 0; i < dist; i++)
+    Advance();
  }

+
  // Reports whether the parsed string atoms contain any characters that were
  // escaped in the original pattern. If not, all atoms are proper substrings
  // of the original pattern.
@@ -3662,7 +3642,7 @@
      // Atom ::
      //   \ AtomEscape
      case '\\':
-      switch (next()) {
+      switch (Next()) {
        case kEndMarker:
          ReportError(CStrVector("\\ at end of pattern"), CHECK_OK);
        case 'b':
@@ -3681,7 +3661,7 @@
          // CharacterClassEscape :: one of
          //   d D s S w W
        case 'd': case 'D': case 's': case 'S': case 'w': case 'W': {
-        uc32 c = next();
+        uc32 c = Next();
          Advance(2);
          ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
          CharacterRange::AddClassEscape(c, ranges);
@@ -3703,7 +3683,7 @@
            builder.AddAtom(atom);
            goto has_read_atom;  // Avoid setting has_character_escapes_.
          }
-        uc32 first_digit = next();
+        uc32 first_digit = Next();
          if (first_digit == '8' || first_digit == '9') {
            // Treat as identity escape
            builder.AddCharacter(first_digit);
@@ -3768,7 +3748,7 @@
        }
        default:
          // Identity escape.
-        builder.AddCharacter(next());
+        builder.AddCharacter(Next());
          Advance(2);
          break;
        }
@@ -3861,8 +3841,7 @@

  bool RegExpParser::ParseBackreferenceIndex(int* index_out) {
    ASSERT_EQ('\\', current());
-  ASSERT('1' <= next() && next() <= '9');
-  ASSERT_EQ(0, pushback_count_);
+  ASSERT('1' <= Next() && Next() <= '9');
    // Try to parse a decimal literal that is no greater than the number
    // of previously encountered left capturing parentheses.
    // This is a not according the the ECMAScript specification. According to
@@ -3870,30 +3849,19 @@
    // parentheses in the entire input, even if they are meaningless.
    if (captures_ == NULL)
      return false;
-  int value = next() - '0';
+  int start = position();
+  int value = Next() - '0';
    if (value > captures_->length())
      return false;
-  static const int kMaxChars = kMaxPushback - 2;
-  EmbeddedVector<uc32, kMaxChars> chars_seen;
-  chars_seen[0] = next();
-  int char_count = 1;
    Advance(2);
    while (true) {
      uc32 c = current();
      if (IsDecimalDigit(c)) {
        value = 10 * value + (c - '0');
-      // To avoid reading past the end of the stack-allocated pushback
-      // buffers we only read kMaxChars before giving up.
-      if (value > captures_->length() || char_count > kMaxChars) {
-        // If we give up we have to push the characters we read back
-        // onto the pushback buffer in the reverse order.
-        for (int i = 0; i < char_count; i++) {
-          PushBack(chars_seen[char_count - i - 1]);
-        }
-        PushBack('\\');
+      if (value > captures_->length()) {
+        Reset(start);
          return false;
        }
-      chars_seen[char_count++] = current();
        Advance();
      } else {
        break;
@@ -3992,26 +3960,19 @@


  bool RegExpParser::ParseHexEscape(int length, uc32 *value) {
-  static const int kMaxChars = kMaxPushback;
-  EmbeddedVector<uc32, kMaxChars> chars_seen;
-  ASSERT(length <= kMaxChars);
+  int start = position();
    uc32 val = 0;
    bool done = false;
    for (int i = 0; !done; i++) {
      uc32 c = current();
      int d = HexValue(c);
      if (d < 0) {
-      while (i > 0) {
-        i--;
-        PushBack(chars_seen[i]);
-      }
+      Reset(start);
        return false;
      }
      val = val * 16 + d;
      Advance();
-    if (i < length - 1) {
-      chars_seen[i] = c;
-    } else {
+    if (i == length - 1) {
        done = true;
      }
    }
@@ -4022,7 +3983,7 @@

  uc32 RegExpParser::ParseClassCharacterEscape(bool* ok) {
    ASSERT(current() == '\\');
-  ASSERT(has_next() && !IsSpecialClassEscape(next()));
+  ASSERT(has_next() && !IsSpecialClassEscape(Next()));
    Advance();
    switch (current()) {
      case 'b':
@@ -4091,9 +4052,9 @@
    char type = '(';
    Advance();
    if (current() == '?') {
-    switch (next()) {
+    switch (Next()) {
        case ':': case '=': case '!':
-        type = next();
+        type = Next();
          Advance(2);
          break;
        default:
@@ -4153,10 +4114,10 @@
    ASSERT_EQ(false, *is_char_class);
    uc32 first = current();
    if (first == '\\') {
-    switch (next()) {
+    switch (Next()) {
        case 'w': case 'W': case 'd': case 'D': case 's': case 'S': {
          *is_char_class = true;
-        uc32 c = next();
+        uc32 c = Next();
          CharacterRange::AddClassEscape(c, ranges);
          Advance(2);
          return NULL;
@@ -4270,10 +4231,10 @@
  }


-bool ParseRegExp(unibrow::CharacterStream* stream, RegExpParseResult*  
result) {
+bool ParseRegExp(FlatStringReader* input, RegExpParseResult* result) {
    ASSERT(result != NULL);
    // Get multiline flag somehow
-  RegExpParser parser(stream, &result->error, false);
+  RegExpParser parser(input, &result->error, false);
    bool ok = true;
    result->tree = parser.ParsePattern(&ok);
    if (!ok) {

Modified: branches/experimental/regexp2000/src/parser.h
==============================================================================
--- branches/experimental/regexp2000/src/parser.h       (original)
+++ branches/experimental/regexp2000/src/parser.h       Wed Nov 19 02:16:41 2008
@@ -145,7 +145,7 @@
                           v8::Extension* extension);


-bool ParseRegExp(unibrow::CharacterStream* stream, RegExpParseResult*  
result);
+bool ParseRegExp(FlatStringReader* input, RegExpParseResult* result);


  // Support for doing lazy compilation. The script is the script containing  
full

Modified: branches/experimental/regexp2000/test/cctest/test-regexp.cc
==============================================================================
--- branches/experimental/regexp2000/test/cctest/test-regexp.cc (original)
+++ branches/experimental/regexp2000/test/cctest/test-regexp.cc Wed Nov 19  
02:16:41 2008
@@ -47,10 +47,10 @@

  static SmartPointer<const char> Parse(const char* input) {
    v8::HandleScope scope;
-  unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
    ZoneScope zone_scope(DELETE_ON_EXIT);
+  FlatStringReader reader(CStrVector(input));
    RegExpParseResult result;
-  CHECK(v8::internal::ParseRegExp(&buffer, &result));
+  CHECK(v8::internal::ParseRegExp(&reader, &result));
    CHECK(result.tree != NULL);
    CHECK(result.error.is_null());
    SmartPointer<const char> output = result.tree->ToString();
@@ -61,8 +61,9 @@
    v8::HandleScope scope;
    unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
    ZoneScope zone_scope(DELETE_ON_EXIT);
+  FlatStringReader reader(CStrVector(input));
    RegExpParseResult result;
-  CHECK(v8::internal::ParseRegExp(&buffer, &result));
+  CHECK(v8::internal::ParseRegExp(&reader, &result));
    CHECK(result.tree != NULL);
    CHECK(result.error.is_null());
    return result.has_character_escapes;
@@ -227,10 +228,10 @@
  static void ExpectError(const char* input,
                          const char* expected) {
    v8::HandleScope scope;
-  unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
    ZoneScope zone_scope(DELETE_ON_EXIT);
+  FlatStringReader reader(CStrVector(input));
    RegExpParseResult result;
-  CHECK_EQ(false, v8::internal::ParseRegExp(&buffer, &result));
+  CHECK_EQ(false, v8::internal::ParseRegExp(&reader, &result));
    CHECK(result.tree == NULL);
    CHECK(!result.error.is_null());
    SmartPointer<char> str = result.error->ToCString(ALLOW_NULLS);
@@ -343,9 +344,9 @@


  static RegExpNode* Compile(const char* input) {
-  unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
+  FlatStringReader reader(CStrVector(input));
    RegExpParseResult result;
-  if (!v8::internal::ParseRegExp(&buffer, &result))
+  if (!v8::internal::ParseRegExp(&reader, &result))
      return NULL;
    RegExpNode* node = NULL;
    RegExpEngine::Compile(&result, &node, false);

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to