Revision: 18018
Author: [email protected]
Date: Fri Nov 22 14:18:15 2013 UTC
Log: Experimental parser: utf8 added to build
[email protected]
BUG=
Review URL: https://codereview.chromium.org/83583002
http://code.google.com/p/v8/source/detail?r=18018
Modified:
/branches/experimental/parser/src/lexer/lexer-shell.cc
/branches/experimental/parser/src/lexer/lexer.gyp
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
=======================================
--- /branches/experimental/parser/src/lexer/lexer-shell.cc Fri Nov 22
12:59:16 2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer-shell.cc Fri Nov 22
14:18:15 2013 UTC
@@ -281,6 +281,11 @@
}
if (run_experimental) {
switch (encoding) {
+ case UTF8:
+ experimental_time = RunExperimentalScanner<int8_t>(
+ fname, isolate, encoding, print_tokens || check_tokens,
+ &experimental_tokens, repeat, harmony_settings);
+ break;
case LATIN1:
experimental_time = RunExperimentalScanner<uint8_t>(
fname, isolate, encoding, print_tokens || check_tokens,
=======================================
--- /branches/experimental/parser/src/lexer/lexer.gyp Fri Nov 22 12:59:16
2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer.gyp Fri Nov 22 14:18:15
2013 UTC
@@ -48,6 +48,7 @@
'lexer-shell.cc',
'<(SHARED_INTERMEDIATE_DIR)/generated_lexer_latin1.cc',
'<(SHARED_INTERMEDIATE_DIR)/generated_lexer_utf16.cc',
+ '<(SHARED_INTERMEDIATE_DIR)/generated_lexer_utf8.cc',
],
'conditions': [
['v8_enable_i18n_support==1', {
@@ -94,6 +95,24 @@
'--encoding=utf16',
],
},
+ {
+ 'action_name': 'codegen_utf8',
+ 'inputs': [
+ '../../src/lexer/lexer_py.re',
+ '../../tools/lexer_generator/*.py',
+ '../../tools/lexer_generator/*.jinja',
+ ],
+ 'outputs': [
+ '<(SHARED_INTERMEDIATE_DIR)/generated_lexer_utf8.cc',
+ ],
+ 'action': [
+ 'python',
+ '../../tools/lexer_generator/generator.py',
+ '--re=../../src/lexer/lexer_py.re',
+ '--code=<(SHARED_INTERMEDIATE_DIR)/generated_lexer_utf8.cc',
+ '--encoding=utf8',
+ ],
+ },
],
},
],
=======================================
---
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
Fri Nov 22 13:16:25 2013 UTC
+++
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
Fri Nov 22 14:18:15 2013 UTC
@@ -72,6 +72,34 @@
uncompilable code for {{type}}
{% endif -%}
{%- endmacro -%}
+
+
+{%- macro long_char_check() -%}
+ {%- if encoding == 'utf16'-%}
+ primary_char > {{upper_bound}}
+ {%- elif encoding == 'utf8'-%}
+ primary_char < 0
+ {%- else -%}
+ uncompilable code for {{encoding}}
+ {%- endif -%}
+{%- endmacro -%}
+
+
+{%- macro long_char_create() -%}
+ {%- if encoding == 'utf16'-%}
+ const uint32_t long_char = primary_char;
+ {%- elif encoding == 'utf8'-%}
+ unsigned bytes_read = 0;
+ const uint32_t long_char = unibrow::Utf8::CalculateValue(
+ reinterpret_cast<uint8_t*>(cursor_),
+ buffer_end_ - cursor_,
+ &bytes_read);
+ cursor_ += bytes_read;
+ if (long_char == unibrow::Utf8::kBadChar) goto default_action;
+ {%- else -%}
+ uncompilable code for {{encoding}}
+ {%- endif -%}
+{%- endmacro -%}
{%- macro do_dfa_state(node_number, inline) -%}
@@ -140,9 +168,8 @@
{% endfor -%}
{%- if state['long_char_transitions'] -%}
- {# TODO macro this up for utf8 #}
- if (primary_char > {{upper_bound}}) {
- uint32_t long_char = primary_char;
+ if ({{long_char_check()}}) {
+ {{long_char_create()}}
{%- for key, transition_state_id in state['long_char_transitions'] %}
if ({{do_key(key)}}) { // long_char transition
{{ do_transition(transition_state_id) }}
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.