Revision: 19301
Author:   [email protected]
Date:     Wed Feb 12 07:46:44 2014 UTC
Log:      Experimental parser: break KeyEncoding off into its own file

[email protected]

BUG=

Review URL: https://codereview.chromium.org/159753009
http://code.google.com/p/v8/source/detail?r=19301

Added:
 /branches/experimental/parser/tools/lexer_generator/encoding.py
Modified:
 /branches/experimental/parser/tools/lexer_generator/transition_keys.py

=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/encoding.py Wed Feb 12 07:46:44 2014 UTC
@@ -0,0 +1,194 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#     * Neither the name of Google Inc. nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from types import IntType
+from itertools import chain
+from string import printable
+from action import Term
+
+class KeyEncoding(object):
+
+  __encodings = {}
+
+  __printable_cache = {
+    ord('\t') : '\\t',
+    ord('\n') : '\\n',
+    ord('\r') : '\\r',
+  }
+
+  @staticmethod
+  def to_str(encoding, x):
+    assert not encoding or encoding.in_primary_range(x, x)
+    if x > 127:
+      return str(x)
+    if not x in KeyEncoding.__printable_cache:
+      res = "'%s'" % chr(x) if chr(x) in printable else str(x)
+      KeyEncoding.__printable_cache[x] = res
+    return KeyEncoding.__printable_cache[x]
+
+  @staticmethod
+  def get(name):
+    if not KeyEncoding.__encodings:
+      Latin1Encoding()
+      Utf16Encoding()
+      Utf8Encoding()
+    return KeyEncoding.__encodings[name]
+
+  def __init__(self, name, primary_range, named_ranges, predefined_ranges):
+    assert not name in KeyEncoding.__encodings
+    assert primary_range[0] <= primary_range[1]
+    KeyEncoding.__encodings[name] = self
+    self.__name = name
+    self.__primary_range = primary_range
+    self.__lower_bound = primary_range[0]
+    self.__upper_bound = primary_range[1]
+ self.__primary_range_component = self.numeric_range_term(primary_range[0], + primary_range[1])
+    self.__named_ranges = {
+      k : Term('NAMED_RANGE_KEY', k) for k in named_ranges }
+    def f(v):
+      if len(v) == 2:
+        return self.numeric_range_term(v[0], v[1])
+      elif len(v) == 1:
+        assert v[0] in self.__named_ranges
+        return self.__named_ranges[v[0]]
+      raise Exception('bad args %s' % str(v))
+    self.__predefined_ranges = {
+      k : map(f, v) for k, v in predefined_ranges.iteritems() }
+
+  def name(self):
+    return self.__name
+
+  def lower_bound(self):
+    return self.__lower_bound
+
+  def upper_bound(self):
+    return self.__upper_bound
+
+  def primary_range(self):
+    return self.__primary_range
+
+  def named_range(self, name):
+    ranges = self.__named_ranges
+    return Term.empty_term() if not name in ranges else ranges[name]
+
+  def named_range_iter(self):
+    return self.__named_range.iteritems()
+
+  def named_range_key_iter(self):
+    return self.__named_ranges.iterkeys()
+
+  def named_range_value_iter(self):
+    return self.__named_ranges.itervalues()
+
+  def predefined_range_iter(self, name):
+    ranges = self.__predefined_ranges
+    return None if not name in ranges else iter(ranges[name])
+
+  def __primary_range_iter(self):
+    yield self.__primary_range_component
+
+  def all_components_iter(self):
+ return chain(self.__primary_range_iter(), self.__named_ranges.itervalues())
+
+  def is_primary_range(self, r):
+    assert len(r) == 2
+    return self.in_primary_range(r[0], r[1])
+
+  def in_primary_range(self, a, b):
+    return self.lower_bound() <= a and b <= self.upper_bound()
+
+  def numeric_range_term(self, a, b):
+    assert type(a) == IntType and type(b) == IntType
+    assert self.in_primary_range(a, b)
+    return Term('NUMERIC_RANGE_KEY', a, b)
+
+class Latin1Encoding(KeyEncoding):
+
+  def __init__(self):
+    super(Latin1Encoding, self).__init__(
+      'latin1',
+      (0, 255),
+      [],
+      {
+        'whitespace':
+          [(9, 9), (11, 12), (32, 32), (133, 133), (160, 160)],
+        'letter':
+          [(65, 90), (97, 122), (170, 170), (181, 181),
+           (186, 186), (192, 214), (216, 246), (248, 255)],
+        'line_terminator':
+          [(10, 10), (13, 13)],
+        'identifier_part_not_letter':
+          [(48, 57), (95, 95)]
+      })
+
+class Utf16Encoding(KeyEncoding):
+
+  def __init__(self):
+    super(Utf16Encoding, self).__init__(
+      'utf16',
+      (0, 255),
+      ['non_primary_whitespace',
+       'non_primary_letter',
+       'non_primary_identifier_part_not_letter',
+       'non_primary_line_terminator',
+       'non_primary_everything_else'],
+      {
+        'whitespace':
+          [(9, 9), (11, 12), (32, 32), (133, 133), (160, 160),
+           ('non_primary_whitespace',)],
+        'letter':
+          [(65, 90), (97, 122), (170, 170), (181, 181),
+           (186, 186), (192, 214), (216, 246), (248, 255),
+           ('non_primary_letter',)],
+        'line_terminator':
+          [(10, 10), (13, 13), ('non_primary_line_terminator',)],
+        'identifier_part_not_letter':
+ [(48, 57), (95, 95), ('non_primary_identifier_part_not_letter',)],
+      })
+
+class Utf8Encoding(KeyEncoding):
+
+  def __init__(self):
+    super(Utf8Encoding, self).__init__(
+      'utf8',
+      (0, 127),
+      ['non_primary_whitespace',
+       'non_primary_letter',
+       'non_primary_identifier_part_not_letter',
+       'non_primary_line_terminator',
+       'non_primary_everything_else'],
+      {
+        'whitespace':
+          [(9, 9), (11, 12), (32, 32), ('non_primary_whitespace',)],
+        'letter':
+          [(65, 90), (97, 122), ('non_primary_letter',)],
+        'line_terminator':
+          [(10, 10), (13, 13), ('non_primary_line_terminator',)],
+        'identifier_part_not_letter':
+ [(48, 57), (95, 95), ('non_primary_identifier_part_not_letter',)],
+      })
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py Tue Feb 11 19:02:48 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py Wed Feb 12 07:46:44 2014 UTC
@@ -25,92 +25,9 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-from types import IntType
 from itertools import chain
+from encoding import KeyEncoding
 from action import Term
-from string import printable
-
-class KeyEncoding(object):
-
-  __encodings = {}
-
-  @staticmethod
-  def get(name):
-    if not KeyEncoding.__encodings:
-      Latin1Encoding()
-      Utf16Encoding()
-      Utf8Encoding()
-    return KeyEncoding.__encodings[name]
-
-  def __init__(self, name, primary_range, named_ranges, predefined_ranges):
-    assert not name in KeyEncoding.__encodings
-    assert primary_range[0] <= primary_range[1]
-    KeyEncoding.__encodings[name] = self
-    self.__name = name
-    self.__primary_range = primary_range
-    self.__lower_bound = primary_range[0]
-    self.__upper_bound = primary_range[1]
- self.__primary_range_component = self.numeric_range_term(primary_range[0], - primary_range[1])
-    self.__named_ranges = {
-      k : Term('NAMED_RANGE_KEY', k) for k in named_ranges }
-    def f(v):
-      if len(v) == 2:
-        return self.numeric_range_term(v[0], v[1])
-      elif len(v) == 1:
-        assert v[0] in self.__named_ranges
-        return self.__named_ranges[v[0]]
-      else:
-        raise Exception()
-    self.__predefined_ranges = {
-      k : map(f, v) for k, v in predefined_ranges.iteritems() }
-
-  def name(self):
-    return self.__name
-
-  def lower_bound(self):
-    return self.__lower_bound
-
-  def upper_bound(self):
-    return self.__upper_bound
-
-  def primary_range(self):
-    return self.__primary_range
-
-  def named_range(self, name):
-    ranges = self.__named_ranges
-    return Term.empty_term() if not name in ranges else ranges[name]
-
-  def named_range_iter(self):
-    return self.__named_range.iteritems()
-
-  def named_range_key_iter(self):
-    return self.__named_ranges.iterkeys()
-
-  def named_range_value_iter(self):
-    return self.__named_ranges.itervalues()
-
-  def predefined_range_iter(self, name):
-    ranges = self.__predefined_ranges
-    return None if not name in ranges else iter(ranges[name])
-
-  def __primary_range_iter(self):
-    yield self.__primary_range_component
-
-  def all_components_iter(self):
- return chain(self.__primary_range_iter(), self.__named_ranges.itervalues())
-
-  def is_primary_range(self, r):
-    assert len(r) == 2
-    return self.in_primary_range(r[0], r[1])
-
-  def in_primary_range(self, a, b):
-    return self.lower_bound() <= a and b <= self.upper_bound()
-
-  def numeric_range_term(self, a, b):
-    assert type(a) == IntType and type(b) == IntType
-    assert self.in_primary_range(a, b)
-    return Term('NUMERIC_RANGE_KEY', a, b)

 class TransitionKey(object):
   '''Represents a transition from a state in DFA or NFA to another state.
@@ -299,20 +216,6 @@
   def __eq__(self, other):
     return isinstance(other, TransitionKey) and self.__term == other.__term

-  @staticmethod
-  def __class_name(encoding, r):
-    for name, v in encoding.class_range_iter():
-      if r == v:
-        return name
-    assert False
-
-  @staticmethod
-  def __unique_name(r):
-    for name, v in TransitionKey.__cached_keys['no_encoding'].items():
-      if v.__ranges and r == v.__ranges[0]:
-        return name[2:]
-    assert False
-
   def range_iter(self, encoding):
     for c in self.__flatten():
       if c.name() == 'NUMERIC_RANGE_KEY':
@@ -324,12 +227,6 @@
       else:
         assert False, 'unimplemented %s' % c

-  __printable_cache = {
-    ord('\t') : '\\t',
-    ord('\n') : '\\n',
-    ord('\r') : '\\r',
-  }
-
   @staticmethod
   def __component_str(encoding, component):
     if component.name() == 'TERM_KEY':
@@ -340,21 +237,13 @@
       return 'epsilon'
     elif component.name() == 'OMEGA_KEY':
       return 'omega'
-    elif component.name() != 'NUMERIC_RANGE_KEY':
-      raise Exception('unprintable %s' % component)
-    r = component.args()
-    def to_str(x):
-      assert not encoding or encoding.in_primary_range(x, x)
-      if x > 127:
-        return str(x)
-      if not x in TransitionKey.__printable_cache:
-        res = "'%s'" % chr(x) if chr(x) in printable else str(x)
-        TransitionKey.__printable_cache[x] = res
-      return TransitionKey.__printable_cache[x]
-    if r[0] == r[1]:
-      return '%s' % to_str(r[0])
-    else:
+    elif component.name() == 'NUMERIC_RANGE_KEY':
+      r = component.args()
+      to_str = lambda x: KeyEncoding.to_str(encoding, x)
+      if r[0] == r[1]:
+        return '%s' % to_str(r[0])
       return '[%s-%s]' % (to_str(r[0]), to_str(r[1]))
+    raise Exception('unprintable %s' % component)

   def __flatten(self):
     return self.__flatten_components([self.__term])
@@ -416,6 +305,8 @@

   @staticmethod
   def __construct(encoding, components):
+    if isinstance(components, Term):
+      components = [components]
     is_unique = False
     acc = []
     last = Term.empty_term()
@@ -441,8 +332,6 @@
     return acc[0] if len(acc) == 1 else Term('COMPOSITE_KEY', *acc)

   def __init__(self, encoding, components):
-    if isinstance(components, Term):
-      components = [components]
     self.__term = TransitionKey.__construct(encoding, components)
     self.__cached_hash = None

@@ -559,8 +448,7 @@

   @staticmethod
   def __invert_components(encoding, components):
-    def key(x, y):
-      return encoding.numeric_range_term(x, y)
+    key = lambda x, y: encoding.numeric_range_term(x, y)
     last = None
     classes = set(encoding.named_range_value_iter())
     for c in components:
@@ -584,69 +472,3 @@
       yield key(last[1] + 1, upper_bound)
     for c in sorted(classes, TransitionKey.__component_compare):
       yield c
-
-class Latin1Encoding(KeyEncoding):
-
-  def __init__(self):
-    super(Latin1Encoding, self).__init__(
-      'latin1',
-      (0, 255),
-      [],
-      {
-        'whitespace':
-          [(9, 9), (11, 12), (32, 32), (133, 133), (160, 160)],
-        'letter':
-          [(65, 90), (97, 122), (170, 170), (181, 181),
-           (186, 186), (192, 214), (216, 246), (248, 255)],
-        'line_terminator':
-          [(10, 10), (13, 13)],
-        'identifier_part_not_letter':
-          [(48, 57), (95, 95)]
-      })
-
-class Utf16Encoding(KeyEncoding):
-
-  def __init__(self):
-    super(Utf16Encoding, self).__init__(
-      'utf16',
-      (0, 255),
-      ['non_primary_whitespace',
-       'non_primary_letter',
-       'non_primary_identifier_part_not_letter',
-       'non_primary_line_terminator',
-       'non_primary_everything_else'],
-      {
-        'whitespace':
-          [(9, 9), (11, 12), (32, 32), (133, 133), (160, 160),
-           ('non_primary_whitespace',)],
-        'letter':
-          [(65, 90), (97, 122), (170, 170), (181, 181),
-           (186, 186), (192, 214), (216, 246), (248, 255),
-           ('non_primary_letter',)],
-        'line_terminator':
-          [(10, 10), (13, 13), ('non_primary_line_terminator',)],
-        'identifier_part_not_letter':
- [(48, 57), (95, 95), ('non_primary_identifier_part_not_letter',)],
-      })
-
-class Utf8Encoding(KeyEncoding):
-
-  def __init__(self):
-    super(Utf8Encoding, self).__init__(
-      'utf8',
-      (0, 127),
-      ['non_primary_whitespace',
-       'non_primary_letter',
-       'non_primary_identifier_part_not_letter',
-       'non_primary_line_terminator',
-       'non_primary_everything_else'],
-      {
-        'whitespace':
-          [(9, 9), (11, 12), (32, 32), ('non_primary_whitespace',)],
-        'letter':
-          [(65, 90), (97, 122), ('non_primary_letter',)],
-        'line_terminator':
-          [(10, 10), (13, 13), ('non_primary_line_terminator',)],
-        'identifier_part_not_letter':
- [(48, 57), (95, 95), ('non_primary_identifier_part_not_letter',)],
-      })

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to