Revision: 17994
Author: [email protected]
Date: Fri Nov 22 09:25:13 2013 UTC
Log: Experimental parser: cleanup after adding encodings
[email protected]
BUG=
Review URL: https://codereview.chromium.org/82953003
http://code.google.com/p/v8/source/detail?r=17994
Modified:
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
/branches/experimental/parser/tools/lexer_generator/code_generator.py
/branches/experimental/parser/tools/lexer_generator/transition_keys.py
=======================================
---
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
Thu Nov 21 17:52:02 2013 UTC
+++
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
Fri Nov 22 09:25:13 2013 UTC
@@ -3,12 +3,12 @@
{%- macro do_key(key) -%}
{%- for r in key -%}
{%- if not loop.first %} || {% endif -%}
- {%- if r[0] == 'LATIN_1' -%}
+ {%- if r[0] == 'PRIMARY_RANGE' -%}
{%- if r[1][0] == r[1][1] -%}
yych == {{r[1][0]}}
{%- elif r[1][0] == 0 -%}
yych <= {{r[1][1]}}
- {%- elif r[1][1] == 255 and encoding == 'latin1'-%}
+ {%- elif r[1][1] == upper_bound and not encoding == 'utf16'-%}
yych >= {{r[1][0]}}
{%- else -%}
({{r[1][0]}} <= yych && yych <= {{r[1][1]}})
@@ -24,18 +24,17 @@
{%- if r[1] == 'byte_order_mark' -%}
(yych == 0xfffe || yych == 0xfeff)
{%- elif r[1] == 'non_latin_1_whitespace' -%}
- {# FIXME: Add and use unicode_cache_->InNonAsciiWhitespace #}
- (yych > 255 && unicode_cache_->IsWhiteSpace(yych))
+ (yych > {{upper_bound}} && unicode_cache_->IsWhiteSpace(yych))
{%- elif r[1] == 'non_latin_1_letter' -%}
- {# FIXME: Add and use unicode_cache_->InNonAsciiLetter #}
- (yych > 255 && unicode_cache_->IsLetter(yych))
+ (yych > {{upper_bound}} && unicode_cache_->IsLetter(yych))
{%- elif r[1] == 'non_latin_1_identifier_part_not_letter' -%}
- (yych > 255 && unicode_cache_->IsIdentifierPartNotLetter(yych))
+ (yych > {{upper_bound}} &&
+ unicode_cache_->IsIdentifierPartNotLetter(yych))
{%- elif r[1] == 'non_latin_1_line_terminator' -%}
- (yych > 255 && unicode_cache_->IsLineTerminator(yych))
+ (yych > {{upper_bound}} &&
unicode_cache_->IsLineTerminator(yych))
{%- elif r[1] == 'non_latin_1_everything_else' -%}
{# FIXME: Optimize this away #}
- (yych > 255 &&
+ (yych > {{upper_bound}} &&
!unicode_cache_->IsWhiteSpace(yych) &&
!unicode_cache_->IsLetter(yych) &&
!unicode_cache_->IsIdentifierPartNotLetter(yych) &&
=======================================
--- /branches/experimental/parser/tools/lexer_generator/code_generator.py
Fri Nov 22 09:02:55 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/code_generator.py
Fri Nov 22 09:25:13 2013 UTC
@@ -82,13 +82,13 @@
@staticmethod
def __range_cmp(left, right):
- if left[0] == 'LATIN_1':
- if right[0] == 'LATIN_1':
+ if left[0] == 'PRIMARY_RANGE':
+ if right[0] == 'PRIMARY_RANGE':
return cmp(left[1], right[1])
assert right[0] == 'CLASS'
return -1
assert left[0] == 'CLASS'
- if right[0] == 'LATIN_1':
+ if right[0] == 'PRIMARY_RANGE':
return 1
# TODO store numeric values and cmp
return cmp(left[1], right[1])
@@ -118,7 +118,7 @@
for (t, r) in disjoint_keys:
if t == 'CLASS':
class_keys += 1
- elif t == 'LATIN_1':
+ elif t == 'PRIMARY_RANGE':
distinct_keys += r[1] - r[0] + 1
ranges += 1
else:
@@ -246,14 +246,15 @@
undefined = jinja2.StrictUndefined)
template = template_env.get_template('code_generator.jinja')
- encoding = self.__dfa.encoding().name()
+ encoding = self.__dfa.encoding()
char_types =
{'latin1': 'uint8_t', 'utf16': 'uint16_t', 'utf8': 'int8_t'}
- char_type = char_types[encoding]
+ char_type = char_types[encoding.name()]
return template.render(
start_node_number = 0,
debug_print = self.__debug_print,
default_action = default_action,
dfa_states = dfa_states,
- encoding = encoding,
- char_type = char_type)
+ encoding = encoding.name(),
+ char_type = char_type,
+ upper_bound = encoding.primary_range()[1])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Fri Nov 22 09:02:55 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Fri Nov 22 09:25:13 2013 UTC
@@ -285,7 +285,7 @@
if encoding.is_class_range(r):
yield ('CLASS', TransitionKey.__class_name(encoding, r))
else:
- yield ('LATIN_1', r)
+ yield ('PRIMARY_RANGE', r)
__printable_cache = {
ord('\t') : '\\t',
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.