Revision: 17994
Author:   [email protected]
Date:     Fri Nov 22 09:25:13 2013 UTC
Log:      Experimental parser: cleanup after adding encodings

[email protected]

BUG=

Review URL: https://codereview.chromium.org/82953003
http://code.google.com/p/v8/source/detail?r=17994

Modified:
 /branches/experimental/parser/tools/lexer_generator/code_generator.jinja
 /branches/experimental/parser/tools/lexer_generator/code_generator.py
 /branches/experimental/parser/tools/lexer_generator/transition_keys.py

=======================================
--- /branches/experimental/parser/tools/lexer_generator/code_generator.jinja Thu Nov 21 17:52:02 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/code_generator.jinja Fri Nov 22 09:25:13 2013 UTC
@@ -3,12 +3,12 @@
 {%- macro do_key(key) -%}
   {%- for r in key -%}
     {%- if not loop.first %} || {% endif -%}
-    {%- if r[0] == 'LATIN_1' -%}
+    {%- if r[0] == 'PRIMARY_RANGE' -%}
       {%- if r[1][0] == r[1][1] -%}
         yych == {{r[1][0]}}
       {%- elif r[1][0] == 0 -%}
         yych <= {{r[1][1]}}
-      {%- elif r[1][1] == 255 and encoding == 'latin1'-%}
+      {%- elif r[1][1] == upper_bound and not encoding == 'utf16'-%}
         yych >= {{r[1][0]}}
       {%- else -%}
         ({{r[1][0]}} <= yych && yych <= {{r[1][1]}})
@@ -24,18 +24,17 @@
         {%- if r[1] == 'byte_order_mark' -%}
           (yych == 0xfffe || yych == 0xfeff)
         {%- elif r[1] == 'non_latin_1_whitespace' -%}
-          {# FIXME: Add and use unicode_cache_->InNonAsciiWhitespace #}
-          (yych > 255 && unicode_cache_->IsWhiteSpace(yych))
+          (yych > {{upper_bound}} && unicode_cache_->IsWhiteSpace(yych))
         {%- elif r[1] == 'non_latin_1_letter' -%}
-          {# FIXME: Add and use unicode_cache_->InNonAsciiLetter #}
-          (yych > 255 &&  unicode_cache_->IsLetter(yych))
+          (yych > {{upper_bound}} &&  unicode_cache_->IsLetter(yych))
         {%- elif r[1] == 'non_latin_1_identifier_part_not_letter' -%}
-          (yych > 255 &&  unicode_cache_->IsIdentifierPartNotLetter(yych))
+          (yych > {{upper_bound}} &&
+            unicode_cache_->IsIdentifierPartNotLetter(yych))
         {%- elif r[1] == 'non_latin_1_line_terminator' -%}
-          (yych > 255 &&  unicode_cache_->IsLineTerminator(yych))
+ (yych > {{upper_bound}} && unicode_cache_->IsLineTerminator(yych))
         {%- elif r[1] == 'non_latin_1_everything_else' -%}
           {# FIXME: Optimize this away #}
-          (yych > 255 &&
+          (yych > {{upper_bound}} &&
            !unicode_cache_->IsWhiteSpace(yych) &&
            !unicode_cache_->IsLetter(yych) &&
            !unicode_cache_->IsIdentifierPartNotLetter(yych) &&
=======================================
--- /branches/experimental/parser/tools/lexer_generator/code_generator.py Fri Nov 22 09:02:55 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/code_generator.py Fri Nov 22 09:25:13 2013 UTC
@@ -82,13 +82,13 @@

   @staticmethod
   def __range_cmp(left, right):
-    if left[0] == 'LATIN_1':
-      if right[0] == 'LATIN_1':
+    if left[0] == 'PRIMARY_RANGE':
+      if right[0] == 'PRIMARY_RANGE':
         return cmp(left[1], right[1])
       assert right[0] == 'CLASS'
       return -1
     assert left[0] == 'CLASS'
-    if right[0] == 'LATIN_1':
+    if right[0] == 'PRIMARY_RANGE':
       return 1
     # TODO store numeric values and cmp
     return cmp(left[1], right[1])
@@ -118,7 +118,7 @@
     for (t, r) in disjoint_keys:
       if t == 'CLASS':
         class_keys += 1
-      elif t == 'LATIN_1':
+      elif t == 'PRIMARY_RANGE':
         distinct_keys += r[1] - r[0] + 1
         ranges += 1
       else:
@@ -246,14 +246,15 @@
       undefined = jinja2.StrictUndefined)
     template = template_env.get_template('code_generator.jinja')

-    encoding = self.__dfa.encoding().name()
+    encoding = self.__dfa.encoding()
char_types = {'latin1': 'uint8_t', 'utf16': 'uint16_t', 'utf8': 'int8_t'}
-    char_type = char_types[encoding]
+    char_type = char_types[encoding.name()]

     return template.render(
       start_node_number = 0,
       debug_print = self.__debug_print,
       default_action = default_action,
       dfa_states = dfa_states,
-      encoding = encoding,
-      char_type = char_type)
+      encoding = encoding.name(),
+      char_type = char_type,
+      upper_bound = encoding.primary_range()[1])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py Fri Nov 22 09:02:55 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py Fri Nov 22 09:25:13 2013 UTC
@@ -285,7 +285,7 @@
       if encoding.is_class_range(r):
         yield ('CLASS', TransitionKey.__class_name(encoding, r))
       else:
-        yield ('LATIN_1', r)
+        yield ('PRIMARY_RANGE', r)

   __printable_cache = {
     ord('\t') : '\\t',

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to