Revision: 18073
Author:   [email protected]
Date:     Tue Nov 26 13:37:30 2013 UTC
Log:      Experimental scanner += API which takes Handle<String>.

[email protected]
BUG=

Review URL: https://codereview.chromium.org/88203002
http://code.google.com/p/v8/source/detail?r=18073

Added:
 /branches/experimental/parser/src/lexer/experimental-scanner.cc
Modified:
 /branches/experimental/parser/src/lexer/experimental-scanner.h
 /branches/experimental/parser/src/lexer/lexer-shell.cc
 /branches/experimental/parser/tools/gyp/v8.gyp
 /branches/experimental/parser/tools/lexer_generator/code_generator.jinja

=======================================
--- /dev/null
+++ /branches/experimental/parser/src/lexer/experimental-scanner.cc Tue Nov 26 13:37:30 2013 UTC
@@ -0,0 +1,66 @@
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "experimental-scanner.h"
+
+namespace v8 {
+namespace internal {
+
+std::set<ScannerBase*>* ScannerBase::scanners_ = NULL;
+
+void ScannerBase::UpdateBuffersAfterGC(v8::Isolate*, GCType, GCCallbackFlags) {
+  if (!scanners_) return;
+  for (std::set<ScannerBase*>::const_iterator it = scanners_->begin();
+       it != scanners_->end();
+       ++it)
+    (*it)->SetBufferBasedOnHandle();
+}
+
+
+template<>
+const uint8_t* ExperimentalScanner<uint8_t>::GetNewBufferBasedOnHandle() const {
+  String::FlatContent content = source_handle_->GetFlatContent();
+  return content.ToOneByteVector().start();
+}
+
+
+template <>
+const uint16_t* ExperimentalScanner<uint16_t>::GetNewBufferBasedOnHandle()
+    const {
+  String::FlatContent content = source_handle_->GetFlatContent();
+  return content.ToUC16Vector().start();
+}
+
+
+template<>
+const int8_t* ExperimentalScanner<int8_t>::GetNewBufferBasedOnHandle() const {
+  String::FlatContent content = source_handle_->GetFlatContent();
+ return reinterpret_cast<const int8_t*>(content.ToOneByteVector().start());
+}
+
+}
+}
=======================================
--- /branches/experimental/parser/src/lexer/experimental-scanner.h Mon Nov 25 14:19:39 2013 UTC +++ /branches/experimental/parser/src/lexer/experimental-scanner.h Tue Nov 26 13:37:30 2013 UTC
@@ -28,6 +28,8 @@
 #ifndef V8_LEXER_EXPERIMENTAL_SCANNER_H
 #define V8_LEXER_EXPERIMENTAL_SCANNER_H

+#include <set>
+
 #include "compiler.h"
 #include "isolate.h"
 #include "scanner.h"  // UnicodeCache.
@@ -59,14 +61,29 @@
   };

   explicit ScannerBase(Isolate* isolate)
-    : unicode_cache_(isolate->unicode_cache()),
+    : isolate_(isolate),
+      unicode_cache_(isolate->unicode_cache()),
       has_line_terminator_before_next_(true),
       harmony_numeric_literals_(false),
       harmony_modules_(false),
       harmony_scoping_(false) {
+    if (!scanners_) {
+      scanners_ = new std::set<ScannerBase*>();
+ isolate->heap()->AddGCEpilogueCallback(&ScannerBase::UpdateBuffersAfterGC,
+                                             kGCTypeAll, false);
+    }
+    scanners_->insert(this);
   }

-  virtual ~ScannerBase() { }
+  virtual ~ScannerBase() {
+    scanners_->erase(this);
+    if (scanners_->empty()) {
+      isolate_->heap()->RemoveGCEpilogueCallback(
+          &ScannerBase::UpdateBuffersAfterGC);
+      delete scanners_;
+      scanners_ = NULL;
+    }
+  }

   // Returns the next token and advances input.
   Token::Value Next() {
@@ -182,6 +199,10 @@

   virtual void Scan() = 0;
   virtual uc32 ScanHexNumber(int length) = 0;
+  virtual void SetBufferBasedOnHandle() = 0;
+
+  static void UpdateBuffersAfterGC(v8::Isolate*, GCType, GCCallbackFlags);
+

   bool ValidIdentifierPart() {
       return unicode_cache_->IsIdentifierPart(ScanHexNumber(4));
@@ -191,6 +212,7 @@
     return unicode_cache_->IsIdentifierStart(ScanHexNumber(4));
   }

+  Isolate* isolate_;
   UnicodeCache* unicode_cache_;

   bool has_line_terminator_before_next_;
@@ -201,6 +223,9 @@
   bool harmony_numeric_literals_;
   bool harmony_modules_;
   bool harmony_scoping_;
+
+ private:
+  static std::set<ScannerBase*>* scanners_;
 };


@@ -208,44 +233,63 @@
 class ExperimentalScanner : public ScannerBase {
  public:
   explicit ExperimentalScanner(
-      YYCTYPE* source,
-      YYCTYPE* source_end,
-      Isolate* isolate);
+      Handle<String> source,
+      Isolate* isolate)
+      : ScannerBase(isolate),
+        source_handle_(source),
+        buffer_(NULL),
+        buffer_end_(NULL),
+        start_(NULL),
+        cursor_(NULL),
+        marker_(NULL) {
+    ASSERT(source->IsFlat());
+    SetBufferBasedOnHandle();
+    Scan();
+  }

-  virtual ~ExperimentalScanner();
+  virtual ~ExperimentalScanner() { }

   virtual void Scan();
   virtual uc32 ScanHexNumber(int length);

+  virtual void SetBufferBasedOnHandle() {
+ // We get a raw pointer from the Handle, but we also update it every time
+    // there is a GC, so it is safe.
+    DisallowHeapAllocation no_gc;
+    const YYCTYPE* new_buffer = GetNewBufferBasedOnHandle();
+    if (new_buffer != buffer_) {
+      int start_offset = start_ - buffer_;
+      int cursor_offset = cursor_ - buffer_;
+      int marker_offset = marker_ - buffer_;
+      buffer_ = new_buffer;
+      buffer_end_ = buffer_ + source_handle_->length();
+      start_ = buffer_ + start_offset;
+      cursor_ = buffer_ + cursor_offset;
+      marker_ = buffer_ + marker_offset;
+    }
+  }
+
+  const YYCTYPE* GetNewBufferBasedOnHandle() const;
+
  private:
+  Handle<String> source_handle_;
   YYCTYPE yych;
-  YYCTYPE* buffer_;
-  YYCTYPE* buffer_end_;
-  YYCTYPE* start_;
-  YYCTYPE* cursor_;
-  YYCTYPE* marker_;
+  const YYCTYPE* buffer_;
+  const YYCTYPE* buffer_end_;
+  const YYCTYPE* start_;
+  const YYCTYPE* cursor_;
+  const YYCTYPE* marker_;
 };


-template<typename YYCTYPE>
-ExperimentalScanner<YYCTYPE>::ExperimentalScanner(
-    YYCTYPE* source,
-    YYCTYPE* source_end,
-    Isolate* isolate)
-    : ScannerBase(isolate),
-      buffer_(source),
-      buffer_end_(source_end),
-      start_(source),
-      cursor_(source),
-      marker_(source) {
-  Scan();
-}
+template<>
+void ExperimentalScanner<uint8_t>::Scan();

+template<>
+void ExperimentalScanner<uint16_t>::Scan();

-template<typename YYCTYPE>
-ExperimentalScanner<YYCTYPE>::~ExperimentalScanner() {
-  delete[] buffer_;
-}
+template<>
+void ExperimentalScanner<int8_t>::Scan();


 template<typename YYCTYPE>
@@ -254,7 +298,7 @@
// FIXME: we never end up in here if only a subset of the 4 chars are valid
   // hex digits -> handle the case where they're not.
   uc32 x = 0;
-  for (YYCTYPE* s = cursor_ - length; s != cursor_; ++s) {
+  for (const YYCTYPE* s = cursor_ - length; s != cursor_; ++s) {
     int d = HexValue(*s);
     if (d < 0) {
       return -1;
=======================================
--- /branches/experimental/parser/src/lexer/lexer-shell.cc Fri Nov 22 14:54:41 2013 UTC +++ /branches/experimental/parser/src/lexer/lexer-shell.cc Tue Nov 26 13:37:30 2013 UTC
@@ -211,7 +211,7 @@


 template<typename YYCTYPE>
-TimeDelta RunExperimentalScanner(const char* fname,
+TimeDelta RunExperimentalScanner(Handle<String> source,
                                  Isolate* isolate,
                                  Encoding encoding,
                                  bool dump_tokens,
@@ -219,13 +219,8 @@
                                  int repeat,
                                  HarmonySettings harmony_settings) {
   ElapsedTimer timer;
-  byte* buffer_end = 0;
-  YYCTYPE* buffer = reinterpret_cast<YYCTYPE*>(
-      ReadFile(fname, &buffer_end, repeat, encoding == UTF8TO16));
-
   timer.Start();
-  ExperimentalScanner<YYCTYPE> scanner(
-      buffer, reinterpret_cast<YYCTYPE*>(buffer_end), isolate);
+  ExperimentalScanner<YYCTYPE> scanner(source, isolate);
   scanner.SetHarmonyNumericLiterals(harmony_settings.numeric_literals);
   scanner.SetHarmonyModules(harmony_settings.modules);
   scanner.SetHarmonyScoping(harmony_settings.scoping);
@@ -278,27 +273,34 @@
         &baseline_tokens, repeat, harmony_settings);
   }
   if (run_experimental) {
+    Handle<String> source;
+    byte* buffer_end = 0;
+    const byte* buffer = ReadFile(fname, &buffer_end, repeat,
+                                  encoding == UTF8TO16);
     switch (encoding) {
       case UTF8:
-        experimental_time = RunExperimentalScanner<int8_t>(
-            fname, isolate, encoding, print_tokens || check_tokens,
-            &experimental_tokens, repeat, harmony_settings);
-        break;
       case LATIN1:
+        source = isolate->factory()->NewStringFromAscii(
+            Vector<const char>(reinterpret_cast<const char*>(buffer),
+                               buffer_end - buffer));
         experimental_time = RunExperimentalScanner<uint8_t>(
-            fname, isolate, encoding, print_tokens || check_tokens,
+            source, isolate, encoding, print_tokens || check_tokens,
             &experimental_tokens, repeat, harmony_settings);
         break;
       case UTF16:
+      case UTF8TO16: {
+        const uc16* buffer_16 = reinterpret_cast<const uc16*>(buffer);
+ const uc16* buffer_end_16 = reinterpret_cast<const uc16*>(buffer_end);
+        source = isolate->factory()->NewStringFromTwoByte(
+            Vector<const uc16>(buffer_16, buffer_end_16 - buffer_16));
+ // If the string was just an expaneded one byte string, V8 detects it
+        // and doesn't store it as two byte.
+        CHECK(source->IsTwoByteRepresentation());
         experimental_time = RunExperimentalScanner<uint16_t>(
-            fname, isolate, encoding, print_tokens || check_tokens,
+            source, isolate, encoding, print_tokens || check_tokens,
             &experimental_tokens, repeat, harmony_settings);
         break;
-      case UTF8TO16:
-        experimental_time = RunExperimentalScanner<uint16_t>(
-            fname, isolate, encoding, print_tokens || check_tokens,
-            &experimental_tokens, repeat, harmony_settings);
-        break;
+      }
       default:
         printf("Encoding not supported by the experimental scanner\n");
         exit(1);
@@ -398,14 +400,20 @@
     ASSERT(!context.IsEmpty());
     {
       v8::Context::Scope scope(context);
-      Isolate* isolate = Isolate::Current();
+      Isolate* internal_isolate = Isolate::Current();
       double baseline_total = 0, experimental_total = 0;
       for (size_t i = 0; i < fnames.size(); i++) {
         std::pair<TimeDelta, TimeDelta> times;
         check_tokens = check_tokens && run_baseline && run_experimental;
- times = ProcessFile(fnames[i].c_str(), encoding, isolate, run_baseline,
-                            run_experimental, print_tokens, check_tokens,
-                            break_after_illegal, repeat,
+        times = ProcessFile(fnames[i].c_str(),
+                            encoding,
+                            internal_isolate,
+                            run_baseline,
+                            run_experimental,
+                            print_tokens,
+                            check_tokens,
+                            break_after_illegal,
+                            repeat,
                             harmony_settings);
         baseline_total += times.first.InMillisecondsF();
         experimental_total += times.second.InMillisecondsF();
=======================================
--- /branches/experimental/parser/tools/gyp/v8.gyp Mon Nov 25 15:21:06 2013 UTC +++ /branches/experimental/parser/tools/gyp/v8.gyp Tue Nov 26 13:37:30 2013 UTC
@@ -470,6 +470,7 @@
         '../../src/jsregexp.cc',
         '../../src/jsregexp.h',
         '../../src/lazy-instance.h',
+        '../../src/lexer/experimental-scanner.cc',
         '../../src/lexer/experimental-scanner.h',
         '../../src/list-inl.h',
         '../../src/list.h',
=======================================
--- /branches/experimental/parser/tools/lexer_generator/code_generator.jinja Mon Nov 25 15:42:28 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/code_generator.jinja Tue Nov 26 13:37:30 2013 UTC
@@ -136,7 +136,7 @@
   {%- elif encoding == 'utf8'-%}
     unsigned bytes_read = 0;
     const uint32_t long_char = unibrow::Utf8::CalculateValue(
-        reinterpret_cast<uint8_t*>(cursor_),
+        reinterpret_cast<const uint8_t*>(cursor_),
         buffer_end_ - cursor_,
         &bytes_read);
     cursor_ += bytes_read - 1;

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to