Reviewers: ,
Message:
Committed patchset #3 manually as r17882.
Description:
Experimental lexer generator: refactor(TransitionKey) += comments.
- Add __init__
- Add comments
- Rename key_matches -> is_superset_of
- reduce -> max
BUG=
Committed: https://code.google.com/p/v8/source/detail?r=17882
Please review this at https://codereview.chromium.org/63943005/
SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser
Affected files (+55, -22 lines):
M tools/lexer_generator/dfa.py
M tools/lexer_generator/nfa.py
M tools/lexer_generator/transition_keys.py
Index: tools/lexer_generator/dfa.py
diff --git a/tools/lexer_generator/dfa.py b/tools/lexer_generator/dfa.py
index
d648cf6343977379331f05d7bd47ef4039abd4a2..d191e0518550f014d15076cf259b102dfd77786b
100644
--- a/tools/lexer_generator/dfa.py
+++ b/tools/lexer_generator/dfa.py
@@ -66,7 +66,7 @@ class DfaState(AutomatonState):
return self.__matches(lambda k, v : k.matches_char(v), value)
def key_matches(self, value):
- return self.__matches(lambda k, v : k.matches_key(v), value)
+ return self.__matches(lambda k, v : k.is_superset_of_key(v), value)
class Dfa(Automaton):
Index: tools/lexer_generator/nfa.py
diff --git a/tools/lexer_generator/nfa.py b/tools/lexer_generator/nfa.py
index
73ff0ceb4f6bdcf05a7dda4adf2910138d6f8c0b..24870e5e9036c90b8264b28526842bfa5a5b867f
100644
--- a/tools/lexer_generator/nfa.py
+++ b/tools/lexer_generator/nfa.py
@@ -105,7 +105,7 @@ class NfaState(AutomatonState):
return self.__matches(lambda k, v : k.matches_char(v), value)
def key_matches(self, value):
- return self.__matches(lambda k, v : k.matches_key(v), value)
+ return self.__matches(lambda k, v : k.is_superset_of_key(v), value)
class Nfa(Automaton):
Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py
b/tools/lexer_generator/transition_keys.py
index
98332ec846573ddd05dd6dc7ce19963da01895e1..d0eb5b1f16d069ebe5862b749aa907f41ab02ba0
100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -28,6 +28,13 @@
from string import printable
class TransitionKey:
+ '''Represents a transition from a state in DFA or NFA to another state.
+
+ A transition key has a list of character ranges and a list of class
ranges
+ (e.g., "whitespace"), defining for which characters the transition
+ happens. When we generate code based on the transition key, the character
+ ranges generate simple checks and the class ranges generate more
complicated
+ conditions, e.g., function calls.'''
__class_bounds = {
'latin_1' : (1, 255),
@@ -39,7 +46,7 @@ class TransitionKey:
'zero' : (259, 259),
}
__lower_bound = 1
- __upper_bound = reduce(lambda acc, (k, v): max(acc, v[1]),
__class_bounds.items(), 0)
+ __upper_bound = max(__class_bounds.values(), key=lambda item: item[1])[1]
__cached_keys = {}
@@ -78,19 +85,6 @@ class TransitionKey:
assert r_is_class
last = r
- @staticmethod
- def __create(ranges, name = None):
- if not ranges:
- assert name == 'epsilon'
- assert not name in TransitionKey.__cached_keys
- else:
- TransitionKey.__verify_ranges(ranges, True)
- key = TransitionKey()
- key.__ranges = tuple(ranges) # immutable
- key.__name = name
- key.__cached_hash = None
- return key
-
def __is_unique(self):
return len(self.__ranges) == 1 and
self.__is_unique_range(self.__ranges[0])
@@ -98,15 +92,17 @@ class TransitionKey:
def __cached_key(name, bounds_getter):
if not name in TransitionKey.__cached_keys:
bounds = bounds_getter(name)
- TransitionKey.__cached_keys[name] = TransitionKey.__create(bounds,
name)
+ TransitionKey.__cached_keys[name] = TransitionKey(bounds, name)
return TransitionKey.__cached_keys[name]
@staticmethod
def epsilon():
+ '''Returns a TransitionKey for the epsilon (empty) transition.'''
return TransitionKey.__cached_key('epsilon', lambda name : [])
@staticmethod
def any():
+ '''Returns a TransitionKey which matches any character.'''
def bounds_getter(name):
bounds = TransitionKey.__class_bounds.values()
bounds.sort()
@@ -115,12 +111,17 @@ class TransitionKey:
@staticmethod
def single_char(char):
+ '''Returns a TransitionKey for a single-character transition.'''
char = ord(char)
assert TransitionKey.__in_latin_1(char)
- return TransitionKey.__create([(char, char)])
+ return TransitionKey([(char, char)])
@staticmethod
def unique(name):
+ '''Returns a unique TransitionKey for the given name (and creates it
if it
+ doesn't exist yet). The TransitionKey won't have any real character
range,
+ but a newly-created "mock" character range which is separate from all
other
+ character ranges.'''
def get_bounds(name):
bound = TransitionKey.__unique_key_counter
TransitionKey.__unique_key_counter -= 1
@@ -157,6 +158,13 @@ class TransitionKey:
@staticmethod
def character_class(graph, key_map):
+ '''Processes 'graph' (a representation of a character class in the rule
+ file), and constructs a TransitionKey based on it. 'key_map' contains
+ already constructed aliases for character classes (they can be used in
the
+ new character class). It is a map from strings (character class names)
to
+ TransitionKeys. For example, graph might represent the character class
+ [a-z:digit:] where 'digit' is a previously constructed characte class
found
+ in "key_map".'''
ranges = []
assert graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS'
invert = graph[0] == 'NOT_CLASS'
@@ -170,7 +178,8 @@ class TransitionKey:
if r[0] <= char and char <= r[1]: return True
return False
- def matches_key(self, key):
+ def is_superset_of_key(self, key):
+ '''Returns true if 'key' is a sub-key of this TransitionKey.'''
assert isinstance(key, self.__class__)
assert key != TransitionKey.epsilon() and not key.__is_unique()
assert len(key.__ranges) == 1
@@ -247,6 +256,16 @@ class TransitionKey:
else:
return '[%s-%s]' % (to_str(r[0]), to_str(r[1]))
+ def __init__(self, ranges, name = None):
+ if not ranges:
+ assert name == 'epsilon'
+ assert not name in TransitionKey.__cached_keys
+ else:
+ TransitionKey.__verify_ranges(ranges, True)
+ self.__name = name
+ self.__ranges = tuple(ranges) # immutable
+ self.__cached_hash = None
+
def __str__(self):
if self.__name:
return self.__name
@@ -254,6 +273,9 @@ class TransitionKey:
@staticmethod
def __disjoint_keys(range_map):
+ '''Takes a set of possibly overlapping ranges, returns a list of ranges
+ which don't overlap and which cover the same points as the original
+ set. range_map is a map from lower bounds to a list of upper bounds.'''
sort = lambda x : sorted(set(x))
range_map = sorted(map(lambda (k, v): (k, sort(v)), range_map.items()))
ranges = []
@@ -294,16 +316,24 @@ class TransitionKey:
@staticmethod
def disjoint_keys(key_set):
+ '''Takes a set of possibly overlapping TransitionKeys, returns a list
of
+ TransitionKeys which don't overlap and whose union is the same as the
union
+ of the original key_set. key_set is a set of TransitionKeys.'''
ranges = TransitionKey.__disjoint_ranges_from_key_set(key_set)
- return map(lambda x : TransitionKey.__create([x]), ranges)
+ return map(lambda x : TransitionKey([x]), ranges)
@staticmethod
def inverse_key(key_set):
+ '''Returns a TransitionKey which matches represents the inverse of the
union
+ of 'key_set'. The TransitionKeys contain a set of character ranges and
a set
+ of classes. The character ranges are inverted in relation to the
latin_1
+ character range, and the character classes are inverted in relation to
all
+ character classes in __class_bounds.'''
ranges = TransitionKey.__disjoint_ranges_from_key_set(key_set)
inverse = TransitionKey.__invert_ranges(ranges)
if not inverse:
return None
- return TransitionKey.__create(inverse)
+ return TransitionKey(inverse)
@staticmethod
def __key_from_ranges(invert, ranges):
@@ -316,7 +346,7 @@ class TransitionKey:
ranges = TransitionKey.__merge_ranges(ranges)
if invert:
ranges = TransitionKey.__invert_ranges(ranges)
- return TransitionKey.__create(ranges)
+ return TransitionKey(ranges)
@staticmethod
def __merge_ranges(ranges):
@@ -344,6 +374,9 @@ class TransitionKey:
def __invert_ranges(ranges):
inverted = []
last = None
+ # Extract character classes (as opposed to character ranges) from
+ # __class_bounds. Since latin_1 is the only real character range, we
can do
+ # this.
classes = set(TransitionKey.__class_bounds.values())
latin_1 = TransitionKey.__class_bounds['latin_1']
classes.remove(latin_1)
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.