Reviewers: marja,

Message:
Committed patchset #1 manually as r18018 (presubmit successful).

Description:
Experimental parser: utf8 added to build

[email protected]

BUG=

Committed: https://code.google.com/p/v8/source/detail?r=18018

Please review this at https://codereview.chromium.org/83583002/

SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser

Affected files (+54, -3 lines):
  M src/lexer/lexer-shell.cc
  M src/lexer/lexer.gyp
  M tools/lexer_generator/code_generator.jinja


Index: src/lexer/lexer-shell.cc
diff --git a/src/lexer/lexer-shell.cc b/src/lexer/lexer-shell.cc
index 8bb4a9659951d5ef91d15a927b97e00414cbfe17..6c72590e2745dfd728bb1b6ad4beba74192d0527 100644
--- a/src/lexer/lexer-shell.cc
+++ b/src/lexer/lexer-shell.cc
@@ -281,6 +281,11 @@ std::pair<TimeDelta, TimeDelta> ProcessFile(
   }
   if (run_experimental) {
     switch (encoding) {
+      case UTF8:
+        experimental_time = RunExperimentalScanner<int8_t>(
+            fname, isolate, encoding, print_tokens || check_tokens,
+            &experimental_tokens, repeat, harmony_settings);
+        break;
       case LATIN1:
         experimental_time = RunExperimentalScanner<uint8_t>(
             fname, isolate, encoding, print_tokens || check_tokens,
Index: src/lexer/lexer.gyp
diff --git a/src/lexer/lexer.gyp b/src/lexer/lexer.gyp
index a4049a9cae51bb2ed0924900803404c0778480a3..97668985459990ba9a9847408b42e98ddef3e3e6 100644
--- a/src/lexer/lexer.gyp
+++ b/src/lexer/lexer.gyp
@@ -48,6 +48,7 @@
         'lexer-shell.cc',
         '<(SHARED_INTERMEDIATE_DIR)/generated_lexer_latin1.cc',
         '<(SHARED_INTERMEDIATE_DIR)/generated_lexer_utf16.cc',
+        '<(SHARED_INTERMEDIATE_DIR)/generated_lexer_utf8.cc',
       ],
       'conditions': [
         ['v8_enable_i18n_support==1', {
@@ -94,6 +95,24 @@
             '--encoding=utf16',
           ],
         },
+        {
+          'action_name': 'codegen_utf8',
+          'inputs': [
+            '../../src/lexer/lexer_py.re',
+            '../../tools/lexer_generator/*.py',
+            '../../tools/lexer_generator/*.jinja',
+          ],
+          'outputs': [
+            '<(SHARED_INTERMEDIATE_DIR)/generated_lexer_utf8.cc',
+          ],
+          'action': [
+            'python',
+            '../../tools/lexer_generator/generator.py',
+            '--re=../../src/lexer/lexer_py.re',
+            '--code=<(SHARED_INTERMEDIATE_DIR)/generated_lexer_utf8.cc',
+            '--encoding=utf8',
+          ],
+        },
       ],
     },
   ],
Index: tools/lexer_generator/code_generator.jinja
diff --git a/tools/lexer_generator/code_generator.jinja b/tools/lexer_generator/code_generator.jinja index 493b90bbdaeedde0674d55a3695d720c717413e5..f118166ce03c5557d95cd24c394421e2397fb344 100644
--- a/tools/lexer_generator/code_generator.jinja
+++ b/tools/lexer_generator/code_generator.jinja
@@ -74,6 +74,34 @@
 {%- endmacro -%}


+{%- macro long_char_check() -%}
+  {%- if encoding == 'utf16'-%}
+    primary_char > {{upper_bound}}
+  {%- elif encoding == 'utf8'-%}
+    primary_char < 0
+  {%- else -%}
+    uncompilable code for {{encoding}}
+  {%- endif -%}
+{%- endmacro -%}
+
+
+{%- macro long_char_create() -%}
+  {%- if encoding == 'utf16'-%}
+    const uint32_t long_char = primary_char;
+  {%- elif encoding == 'utf8'-%}
+    unsigned bytes_read = 0;
+    const uint32_t long_char = unibrow::Utf8::CalculateValue(
+        reinterpret_cast<uint8_t*>(cursor_),
+        buffer_end_ - cursor_,
+        &bytes_read);
+    cursor_ += bytes_read;
+    if (long_char == unibrow::Utf8::kBadChar) goto default_action;
+  {%- else -%}
+    uncompilable code for {{encoding}}
+  {%- endif -%}
+{%- endmacro -%}
+
+
 {%- macro do_dfa_state(node_number, inline) -%}

   {%- set state = dfa_states[node_number] -%}
@@ -140,9 +168,8 @@
   {% endfor -%}

   {%- if state['long_char_transitions'] -%}
-    {# TODO macro this up for utf8 #}
-    if (primary_char > {{upper_bound}}) {
-      uint32_t long_char = primary_char;
+    if ({{long_char_check()}}) {
+      {{long_char_create()}}
       {%- for key, transition_state_id in state['long_char_transitions'] %}
         if ({{do_key(key)}}) { // long_char transition
           {{ do_transition(transition_state_id) }}


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to