Revision: 18018
Author:   [email protected]
Date:     Fri Nov 22 14:18:15 2013 UTC
Log:      Experimental parser: utf8 added to build

[email protected]

BUG=

Review URL: https://codereview.chromium.org/83583002
http://code.google.com/p/v8/source/detail?r=18018

Modified:
 /branches/experimental/parser/src/lexer/lexer-shell.cc
 /branches/experimental/parser/src/lexer/lexer.gyp
 /branches/experimental/parser/tools/lexer_generator/code_generator.jinja

=======================================
--- /branches/experimental/parser/src/lexer/lexer-shell.cc Fri Nov 22 12:59:16 2013 UTC +++ /branches/experimental/parser/src/lexer/lexer-shell.cc Fri Nov 22 14:18:15 2013 UTC
@@ -281,6 +281,11 @@
   }
   if (run_experimental) {
     switch (encoding) {
+      case UTF8:
+        experimental_time = RunExperimentalScanner<int8_t>(
+            fname, isolate, encoding, print_tokens || check_tokens,
+            &experimental_tokens, repeat, harmony_settings);
+        break;
       case LATIN1:
         experimental_time = RunExperimentalScanner<uint8_t>(
             fname, isolate, encoding, print_tokens || check_tokens,
=======================================
--- /branches/experimental/parser/src/lexer/lexer.gyp Fri Nov 22 12:59:16 2013 UTC +++ /branches/experimental/parser/src/lexer/lexer.gyp Fri Nov 22 14:18:15 2013 UTC
@@ -48,6 +48,7 @@
         'lexer-shell.cc',
         '<(SHARED_INTERMEDIATE_DIR)/generated_lexer_latin1.cc',
         '<(SHARED_INTERMEDIATE_DIR)/generated_lexer_utf16.cc',
+        '<(SHARED_INTERMEDIATE_DIR)/generated_lexer_utf8.cc',
       ],
       'conditions': [
         ['v8_enable_i18n_support==1', {
@@ -94,6 +95,24 @@
             '--encoding=utf16',
           ],
         },
+        {
+          'action_name': 'codegen_utf8',
+          'inputs': [
+            '../../src/lexer/lexer_py.re',
+            '../../tools/lexer_generator/*.py',
+            '../../tools/lexer_generator/*.jinja',
+          ],
+          'outputs': [
+            '<(SHARED_INTERMEDIATE_DIR)/generated_lexer_utf8.cc',
+          ],
+          'action': [
+            'python',
+            '../../tools/lexer_generator/generator.py',
+            '--re=../../src/lexer/lexer_py.re',
+            '--code=<(SHARED_INTERMEDIATE_DIR)/generated_lexer_utf8.cc',
+            '--encoding=utf8',
+          ],
+        },
       ],
     },
   ],
=======================================
--- /branches/experimental/parser/tools/lexer_generator/code_generator.jinja Fri Nov 22 13:16:25 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/code_generator.jinja Fri Nov 22 14:18:15 2013 UTC
@@ -72,6 +72,34 @@
     uncompilable code for {{type}}
   {% endif -%}
 {%- endmacro -%}
+
+
+{%- macro long_char_check() -%}
+  {%- if encoding == 'utf16'-%}
+    primary_char > {{upper_bound}}
+  {%- elif encoding == 'utf8'-%}
+    primary_char < 0
+  {%- else -%}
+    uncompilable code for {{encoding}}
+  {%- endif -%}
+{%- endmacro -%}
+
+
+{%- macro long_char_create() -%}
+  {%- if encoding == 'utf16'-%}
+    const uint32_t long_char = primary_char;
+  {%- elif encoding == 'utf8'-%}
+    unsigned bytes_read = 0;
+    const uint32_t long_char = unibrow::Utf8::CalculateValue(
+        reinterpret_cast<uint8_t*>(cursor_),
+        buffer_end_ - cursor_,
+        &bytes_read);
+    cursor_ += bytes_read;
+    if (long_char == unibrow::Utf8::kBadChar) goto default_action;
+  {%- else -%}
+    uncompilable code for {{encoding}}
+  {%- endif -%}
+{%- endmacro -%}


 {%- macro do_dfa_state(node_number, inline) -%}
@@ -140,9 +168,8 @@
   {% endfor -%}

   {%- if state['long_char_transitions'] -%}
-    {# TODO macro this up for utf8 #}
-    if (primary_char > {{upper_bound}}) {
-      uint32_t long_char = primary_char;
+    if ({{long_char_check()}}) {
+      {{long_char_create()}}
       {%- for key, transition_state_id in state['long_char_transitions'] %}
         if ({{do_key(key)}}) { // long_char transition
           {{ do_transition(transition_state_id) }}

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to