[v8-dev] Experimental lexer: track one-byteness on the fly. (issue 136793025)

marja Fri, 17 Jan 2014 08:31:43 -0800

Reviewers: dcarney, ulan,

Message:
ptal



Description:
Experimental lexer: track one-byteness on the fly.

BUG=
[email protected],[email protected]

Please review this at https://codereview.chromium.org/136793025/

SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser

Affected files (+14, -12 lines):
  M src/lexer/experimental-scanner.h
  M src/lexer/experimental-scanner.cc
  M tools/lexer_generator/code_generator.jinja


Index: src/lexer/experimental-scanner.cc

diff --git a/src/lexer/experimental-scanner.ccb/src/lexer/experimental-scanner.ccindexd354908e6c4619d134af34616e08c6a670ee2f78..28aaf6c0ac0884bf973f107fc63f7c1ceb4038b9100644

--- a/src/lexer/experimental-scanner.cc
+++ b/src/lexer/experimental-scanner.cc
@@ -106,28 +106,21 @@ bool ExperimentalScanner<uint16_t>::FillLiteral(
     ++start;
     --end;
   }
+  literal->buffer.Reset();
   if (!token.has_escapes) {
     // UTF-16 can also contain only one byte chars. Note that is_ascii here
     // means is_onebyte.
-    literal->is_ascii = true;
-    literal->buffer.Reset();
-    for (const uint16_t* cursor = start; cursor != end; ++cursor) {
-      if (*cursor >= unibrow::Latin1::kMaxChar) {
-        literal->is_ascii = false;
-        break;
-      }
-      literal->buffer.AddChar(*cursor);
-    }
     literal->length = end - start;
-    if (literal->is_ascii) {
+    literal->is_ascii = token.is_onebyte;
+    if (token.is_onebyte) {
+      for (const uint16_t* cursor = start; cursor != end; ++cursor)
+        literal->buffer.AddChar(*cursor);
       literal->ascii_string = literal->buffer.ascii_literal();
     } else {
-      literal->buffer.Reset();

literal->utf16_string = Vector<const uint16_t>(start,literal->length);

     }
     return true;
   }
-  literal->buffer.Reset();
   for (const uint16_t* cursor = start; cursor != end;) {
     if (*cursor != '\\') {
       literal->buffer.AddChar(*cursor++);
Index: src/lexer/experimental-scanner.h

diff --git a/src/lexer/experimental-scanner.hb/src/lexer/experimental-scanner.hindex8a9a96e1e4df2560ba0841ccd86d1201b60030d6..a0fa0006206508514cfc732363300b53d09a2606100644

--- a/src/lexer/experimental-scanner.h
+++ b/src/lexer/experimental-scanner.h
@@ -238,6 +238,7 @@ class ScannerBase {
     int beg_pos;
     int end_pos;
     bool has_escapes;
+    bool is_onebyte;
   };

   struct LiteralDesc {
Index: tools/lexer_generator/code_generator.jinja

diff --git a/tools/lexer_generator/code_generator.jinjab/tools/lexer_generator/code_generator.jinjaindex67ccb56a3d60b5670a40d3597e98589d77d0e041..d5951785a978d6089a7aa543d996e0b35b41c502100644

--- a/tools/lexer_generator/code_generator.jinja
+++ b/tools/lexer_generator/code_generator.jinja
@@ -105,6 +105,9 @@
     {{dispatch_match_action('backtrack', ('1', 'ILLEGAL'))}}
   {% elif type == 'skip' %}
     RESET_START();
+    {%- if encoding == 'utf16'-%}
+      next_.is_onebyte = true;
+    {%- endif -%}
     goto state_entry_0;
   {% elif type == 'skip_and_terminate' %}
     RESET_START();
@@ -112,6 +115,9 @@
     {{dispatch_match_action('terminate', None)}}
   {% elif type == 'line_terminator' %}
     RESET_START();
+    {%- if encoding == 'utf16'-%}
+      next_.is_onebyte = true;
+    {%- endif -%}
     has_line_terminator_before_next_ = true;
     goto state_entry_0;
   {% elif type == 'token' %}
@@ -264,6 +270,7 @@

   {%- if state['long_char_transitions'] -%}
     if ({{long_char_check()}}) {
+      next_.is_onebyte = false;
       {{long_char_create()}}
       {%- for key, transition_state_id in state['long_char_transitions'] %}
         if ({{do_key(key)}}) { // long_char transition
@@ -323,6 +330,7 @@ void ExperimentalScanner<{{char_type}}>::Scan() {

   // Setup environment.
   next_.has_escapes = false;
+  next_.is_onebyte = true;
   Token::Value stored_token;
   const {{char_type}} * marker;
   {{char_type}} primary_char;


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

---You received this message because you are subscribed to the Google Groups "v8-dev" group.

To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

[v8-dev] Experimental lexer: track one-byteness on the fly. (issue 136793025)

Reply via email to