Revision: 19301
Author: [email protected]
Date: Wed Feb 12 07:46:44 2014 UTC
Log: Experimental parser: break KeyEncoding off into its own file
[email protected]
BUG=
Review URL: https://codereview.chromium.org/159753009
http://code.google.com/p/v8/source/detail?r=19301
Added:
/branches/experimental/parser/tools/lexer_generator/encoding.py
Modified:
/branches/experimental/parser/tools/lexer_generator/transition_keys.py
=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/encoding.py Wed Feb
12 07:46:44 2014 UTC
@@ -0,0 +1,194 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from types import IntType
+from itertools import chain
+from string import printable
+from action import Term
+
+class KeyEncoding(object):
+
+ __encodings = {}
+
+ __printable_cache = {
+ ord('\t') : '\\t',
+ ord('\n') : '\\n',
+ ord('\r') : '\\r',
+ }
+
+ @staticmethod
+ def to_str(encoding, x):
+ assert not encoding or encoding.in_primary_range(x, x)
+ if x > 127:
+ return str(x)
+ if not x in KeyEncoding.__printable_cache:
+ res = "'%s'" % chr(x) if chr(x) in printable else str(x)
+ KeyEncoding.__printable_cache[x] = res
+ return KeyEncoding.__printable_cache[x]
+
+ @staticmethod
+ def get(name):
+ if not KeyEncoding.__encodings:
+ Latin1Encoding()
+ Utf16Encoding()
+ Utf8Encoding()
+ return KeyEncoding.__encodings[name]
+
+ def __init__(self, name, primary_range, named_ranges, predefined_ranges):
+ assert not name in KeyEncoding.__encodings
+ assert primary_range[0] <= primary_range[1]
+ KeyEncoding.__encodings[name] = self
+ self.__name = name
+ self.__primary_range = primary_range
+ self.__lower_bound = primary_range[0]
+ self.__upper_bound = primary_range[1]
+ self.__primary_range_component =
self.numeric_range_term(primary_range[0],
+
primary_range[1])
+ self.__named_ranges = {
+ k : Term('NAMED_RANGE_KEY', k) for k in named_ranges }
+ def f(v):
+ if len(v) == 2:
+ return self.numeric_range_term(v[0], v[1])
+ elif len(v) == 1:
+ assert v[0] in self.__named_ranges
+ return self.__named_ranges[v[0]]
+ raise Exception('bad args %s' % str(v))
+ self.__predefined_ranges = {
+ k : map(f, v) for k, v in predefined_ranges.iteritems() }
+
+ def name(self):
+ return self.__name
+
+ def lower_bound(self):
+ return self.__lower_bound
+
+ def upper_bound(self):
+ return self.__upper_bound
+
+ def primary_range(self):
+ return self.__primary_range
+
+ def named_range(self, name):
+ ranges = self.__named_ranges
+ return Term.empty_term() if not name in ranges else ranges[name]
+
+ def named_range_iter(self):
+ return self.__named_range.iteritems()
+
+ def named_range_key_iter(self):
+ return self.__named_ranges.iterkeys()
+
+ def named_range_value_iter(self):
+ return self.__named_ranges.itervalues()
+
+ def predefined_range_iter(self, name):
+ ranges = self.__predefined_ranges
+ return None if not name in ranges else iter(ranges[name])
+
+ def __primary_range_iter(self):
+ yield self.__primary_range_component
+
+ def all_components_iter(self):
+ return chain(self.__primary_range_iter(),
self.__named_ranges.itervalues())
+
+ def is_primary_range(self, r):
+ assert len(r) == 2
+ return self.in_primary_range(r[0], r[1])
+
+ def in_primary_range(self, a, b):
+ return self.lower_bound() <= a and b <= self.upper_bound()
+
+ def numeric_range_term(self, a, b):
+ assert type(a) == IntType and type(b) == IntType
+ assert self.in_primary_range(a, b)
+ return Term('NUMERIC_RANGE_KEY', a, b)
+
+class Latin1Encoding(KeyEncoding):
+
+ def __init__(self):
+ super(Latin1Encoding, self).__init__(
+ 'latin1',
+ (0, 255),
+ [],
+ {
+ 'whitespace':
+ [(9, 9), (11, 12), (32, 32), (133, 133), (160, 160)],
+ 'letter':
+ [(65, 90), (97, 122), (170, 170), (181, 181),
+ (186, 186), (192, 214), (216, 246), (248, 255)],
+ 'line_terminator':
+ [(10, 10), (13, 13)],
+ 'identifier_part_not_letter':
+ [(48, 57), (95, 95)]
+ })
+
+class Utf16Encoding(KeyEncoding):
+
+ def __init__(self):
+ super(Utf16Encoding, self).__init__(
+ 'utf16',
+ (0, 255),
+ ['non_primary_whitespace',
+ 'non_primary_letter',
+ 'non_primary_identifier_part_not_letter',
+ 'non_primary_line_terminator',
+ 'non_primary_everything_else'],
+ {
+ 'whitespace':
+ [(9, 9), (11, 12), (32, 32), (133, 133), (160, 160),
+ ('non_primary_whitespace',)],
+ 'letter':
+ [(65, 90), (97, 122), (170, 170), (181, 181),
+ (186, 186), (192, 214), (216, 246), (248, 255),
+ ('non_primary_letter',)],
+ 'line_terminator':
+ [(10, 10), (13, 13), ('non_primary_line_terminator',)],
+ 'identifier_part_not_letter':
+ [(48, 57), (95, 95),
('non_primary_identifier_part_not_letter',)],
+ })
+
+class Utf8Encoding(KeyEncoding):
+
+ def __init__(self):
+ super(Utf8Encoding, self).__init__(
+ 'utf8',
+ (0, 127),
+ ['non_primary_whitespace',
+ 'non_primary_letter',
+ 'non_primary_identifier_part_not_letter',
+ 'non_primary_line_terminator',
+ 'non_primary_everything_else'],
+ {
+ 'whitespace':
+ [(9, 9), (11, 12), (32, 32), ('non_primary_whitespace',)],
+ 'letter':
+ [(65, 90), (97, 122), ('non_primary_letter',)],
+ 'line_terminator':
+ [(10, 10), (13, 13), ('non_primary_line_terminator',)],
+ 'identifier_part_not_letter':
+ [(48, 57), (95, 95),
('non_primary_identifier_part_not_letter',)],
+ })
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Tue Feb 11 19:02:48 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Wed Feb 12 07:46:44 2014 UTC
@@ -25,92 +25,9 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-from types import IntType
from itertools import chain
+from encoding import KeyEncoding
from action import Term
-from string import printable
-
-class KeyEncoding(object):
-
- __encodings = {}
-
- @staticmethod
- def get(name):
- if not KeyEncoding.__encodings:
- Latin1Encoding()
- Utf16Encoding()
- Utf8Encoding()
- return KeyEncoding.__encodings[name]
-
- def __init__(self, name, primary_range, named_ranges, predefined_ranges):
- assert not name in KeyEncoding.__encodings
- assert primary_range[0] <= primary_range[1]
- KeyEncoding.__encodings[name] = self
- self.__name = name
- self.__primary_range = primary_range
- self.__lower_bound = primary_range[0]
- self.__upper_bound = primary_range[1]
- self.__primary_range_component =
self.numeric_range_term(primary_range[0],
-
primary_range[1])
- self.__named_ranges = {
- k : Term('NAMED_RANGE_KEY', k) for k in named_ranges }
- def f(v):
- if len(v) == 2:
- return self.numeric_range_term(v[0], v[1])
- elif len(v) == 1:
- assert v[0] in self.__named_ranges
- return self.__named_ranges[v[0]]
- else:
- raise Exception()
- self.__predefined_ranges = {
- k : map(f, v) for k, v in predefined_ranges.iteritems() }
-
- def name(self):
- return self.__name
-
- def lower_bound(self):
- return self.__lower_bound
-
- def upper_bound(self):
- return self.__upper_bound
-
- def primary_range(self):
- return self.__primary_range
-
- def named_range(self, name):
- ranges = self.__named_ranges
- return Term.empty_term() if not name in ranges else ranges[name]
-
- def named_range_iter(self):
- return self.__named_range.iteritems()
-
- def named_range_key_iter(self):
- return self.__named_ranges.iterkeys()
-
- def named_range_value_iter(self):
- return self.__named_ranges.itervalues()
-
- def predefined_range_iter(self, name):
- ranges = self.__predefined_ranges
- return None if not name in ranges else iter(ranges[name])
-
- def __primary_range_iter(self):
- yield self.__primary_range_component
-
- def all_components_iter(self):
- return chain(self.__primary_range_iter(),
self.__named_ranges.itervalues())
-
- def is_primary_range(self, r):
- assert len(r) == 2
- return self.in_primary_range(r[0], r[1])
-
- def in_primary_range(self, a, b):
- return self.lower_bound() <= a and b <= self.upper_bound()
-
- def numeric_range_term(self, a, b):
- assert type(a) == IntType and type(b) == IntType
- assert self.in_primary_range(a, b)
- return Term('NUMERIC_RANGE_KEY', a, b)
class TransitionKey(object):
'''Represents a transition from a state in DFA or NFA to another state.
@@ -299,20 +216,6 @@
def __eq__(self, other):
return isinstance(other, TransitionKey) and self.__term == other.__term
- @staticmethod
- def __class_name(encoding, r):
- for name, v in encoding.class_range_iter():
- if r == v:
- return name
- assert False
-
- @staticmethod
- def __unique_name(r):
- for name, v in TransitionKey.__cached_keys['no_encoding'].items():
- if v.__ranges and r == v.__ranges[0]:
- return name[2:]
- assert False
-
def range_iter(self, encoding):
for c in self.__flatten():
if c.name() == 'NUMERIC_RANGE_KEY':
@@ -324,12 +227,6 @@
else:
assert False, 'unimplemented %s' % c
- __printable_cache = {
- ord('\t') : '\\t',
- ord('\n') : '\\n',
- ord('\r') : '\\r',
- }
-
@staticmethod
def __component_str(encoding, component):
if component.name() == 'TERM_KEY':
@@ -340,21 +237,13 @@
return 'epsilon'
elif component.name() == 'OMEGA_KEY':
return 'omega'
- elif component.name() != 'NUMERIC_RANGE_KEY':
- raise Exception('unprintable %s' % component)
- r = component.args()
- def to_str(x):
- assert not encoding or encoding.in_primary_range(x, x)
- if x > 127:
- return str(x)
- if not x in TransitionKey.__printable_cache:
- res = "'%s'" % chr(x) if chr(x) in printable else str(x)
- TransitionKey.__printable_cache[x] = res
- return TransitionKey.__printable_cache[x]
- if r[0] == r[1]:
- return '%s' % to_str(r[0])
- else:
+ elif component.name() == 'NUMERIC_RANGE_KEY':
+ r = component.args()
+ to_str = lambda x: KeyEncoding.to_str(encoding, x)
+ if r[0] == r[1]:
+ return '%s' % to_str(r[0])
return '[%s-%s]' % (to_str(r[0]), to_str(r[1]))
+ raise Exception('unprintable %s' % component)
def __flatten(self):
return self.__flatten_components([self.__term])
@@ -416,6 +305,8 @@
@staticmethod
def __construct(encoding, components):
+ if isinstance(components, Term):
+ components = [components]
is_unique = False
acc = []
last = Term.empty_term()
@@ -441,8 +332,6 @@
return acc[0] if len(acc) == 1 else Term('COMPOSITE_KEY', *acc)
def __init__(self, encoding, components):
- if isinstance(components, Term):
- components = [components]
self.__term = TransitionKey.__construct(encoding, components)
self.__cached_hash = None
@@ -559,8 +448,7 @@
@staticmethod
def __invert_components(encoding, components):
- def key(x, y):
- return encoding.numeric_range_term(x, y)
+ key = lambda x, y: encoding.numeric_range_term(x, y)
last = None
classes = set(encoding.named_range_value_iter())
for c in components:
@@ -584,69 +472,3 @@
yield key(last[1] + 1, upper_bound)
for c in sorted(classes, TransitionKey.__component_compare):
yield c
-
-class Latin1Encoding(KeyEncoding):
-
- def __init__(self):
- super(Latin1Encoding, self).__init__(
- 'latin1',
- (0, 255),
- [],
- {
- 'whitespace':
- [(9, 9), (11, 12), (32, 32), (133, 133), (160, 160)],
- 'letter':
- [(65, 90), (97, 122), (170, 170), (181, 181),
- (186, 186), (192, 214), (216, 246), (248, 255)],
- 'line_terminator':
- [(10, 10), (13, 13)],
- 'identifier_part_not_letter':
- [(48, 57), (95, 95)]
- })
-
-class Utf16Encoding(KeyEncoding):
-
- def __init__(self):
- super(Utf16Encoding, self).__init__(
- 'utf16',
- (0, 255),
- ['non_primary_whitespace',
- 'non_primary_letter',
- 'non_primary_identifier_part_not_letter',
- 'non_primary_line_terminator',
- 'non_primary_everything_else'],
- {
- 'whitespace':
- [(9, 9), (11, 12), (32, 32), (133, 133), (160, 160),
- ('non_primary_whitespace',)],
- 'letter':
- [(65, 90), (97, 122), (170, 170), (181, 181),
- (186, 186), (192, 214), (216, 246), (248, 255),
- ('non_primary_letter',)],
- 'line_terminator':
- [(10, 10), (13, 13), ('non_primary_line_terminator',)],
- 'identifier_part_not_letter':
- [(48, 57), (95, 95),
('non_primary_identifier_part_not_letter',)],
- })
-
-class Utf8Encoding(KeyEncoding):
-
- def __init__(self):
- super(Utf8Encoding, self).__init__(
- 'utf8',
- (0, 127),
- ['non_primary_whitespace',
- 'non_primary_letter',
- 'non_primary_identifier_part_not_letter',
- 'non_primary_line_terminator',
- 'non_primary_everything_else'],
- {
- 'whitespace':
- [(9, 9), (11, 12), (32, 32), ('non_primary_whitespace',)],
- 'letter':
- [(65, 90), (97, 122), ('non_primary_letter',)],
- 'line_terminator':
- [(10, 10), (13, 13), ('non_primary_line_terminator',)],
- 'identifier_part_not_letter':
- [(48, 57), (95, 95),
('non_primary_identifier_part_not_letter',)],
- })
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.