Reviewers: ,
Message:
Committed patchset #1 manually as r17446.
Description:
Experimental parser: better disjoint keys
BUG=
Committed: https://code.google.com/p/v8/source/detail?r=17446
Please review this at https://codereview.chromium.org/54183004/
SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser
Affected files (+38, -18 lines):
M tools/lexer_generator/automata_test.py
M tools/lexer_generator/transition_keys.py
Index: tools/lexer_generator/automata_test.py
diff --git a/tools/lexer_generator/automata_test.py
b/tools/lexer_generator/automata_test.py
index
ca4abe37f02d5de3486e9247e8c75bad16b94aee..abe33e8731a473cd5a519bf049bbf9c7deb23680
100644
--- a/tools/lexer_generator/automata_test.py
+++ b/tools/lexer_generator/automata_test.py
@@ -52,8 +52,8 @@ class AutomataTestCase(unittest.TestCase):
(".", ["a", "b"], ["", "aa"]),
(".*", ["", "a", "abcaabbcc"], []),
("a.b", ["aab", "abb", "acb"], ["ab", ""]),
- # ("a.?b", ["aab", "abb", "acb", "ab"], ["aaab", ""]),
- # ("a.+b", ["aab", "abb", "acb", "ab"], ["aaab", ""]),
+ ("a.?b", ["aab", "abb", "acb", "ab"], ["aaab", ""]),
+ ("a.+b", ["aab", "abb", "acb"], ["aaac", "ab", ""]),
(".|.", ["a", "b"], ["aa", ""]),
]
Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py
b/tools/lexer_generator/transition_keys.py
index
f84745de29a424ac6202b1f9a7504c3a9245706d..da7e88be4f67044d51b1c1e72c00b34c405be820
100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -24,6 +24,7 @@
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+from string import printable
class TransitionKey:
@@ -48,7 +49,7 @@ class TransitionKey:
def __create(ranges):
TransitionKey.__verify_ranges(ranges)
key = TransitionKey()
- key.__ranges = ranges
+ key.__ranges = tuple(ranges) # immutable
key.__cached_hash = None
return key
@@ -97,6 +98,7 @@ class TransitionKey:
subkey = key.__ranges[0]
for k in self.__ranges:
if k[0] <= subkey[0] and k[1] >= subkey[1]: return True
+ # TODO assert disjoint
return False
def __hash__(self):
@@ -109,20 +111,25 @@ class TransitionKey:
def __eq__(self, other):
return isinstance(other, self.__class__) and self.__ranges ==
other.__ranges
+ __printable_cache = {}
+
@staticmethod
def __print_range(r):
def to_str(x):
if x <= TransitionKey.__latin_1_upper_bound:
- return chr(x)
+ if not x in TransitionKey.__printable_cache:
+ res = "'%s'" % chr(x) if chr(x) in printable else str(x)
+ TransitionKey.__printable_cache[x] = res
+ return TransitionKey.__printable_cache[x]
if x == TransitionKey.__unicode_literal_bounds[0]:
return "literal"
if x == TransitionKey.__unicode_whitespace_bounds[0]:
return "whitespace"
assert False
if r[0] == r[1]:
- return "'%s'" % to_str(r[0])
+ return "%s" % to_str(r[0])
else:
- return "['%s'-'%s']" % (to_str(r[0]), to_str(r[1]))
+ return "[%s-%s]" % (to_str(r[0]), to_str(r[1]))
def __str__(self):
if self == self.epsilon():
@@ -133,17 +140,30 @@ class TransitionKey:
@staticmethod
def disjoint_keys(key_set):
- ranges = sorted(reduce(lambda acc, x : acc + x.__ranges, key_set, []))
- for i in range(0, len(ranges)):
- right = ranges[i][1]
- limit = i
- for j in range(i + 1, len(ranges)):
- if ranges[j][0] > right:
- break
- assert ranges[j][0] >= ranges[i][0]
- limit = j
- if limit == i: continue
- for j in range(i + 1, limit + 1):
- ranges[j] = (right, ranges[j][1])
+ range_map = {}
+ for x in key_set:
+ for r in x.__ranges:
+ if not r[0] in range_map:
+ range_map[r[0]] = []
+ range_map[r[0]].append(r[1])
+ sort = lambda x : sorted(set(x))
+ range_map = sorted(map(lambda (k, v): (k, sort(v)), range_map.items()))
+ ranges = []
+ upper_bound = TransitionKey.__upper_bound + 1
+ for i in range(len(range_map)):
+ (left, left_values) = range_map[i]
+ next = range_map[i + 1][0] if i != len(range_map) - 1 else
upper_bound
+ to_push = []
+ for v in left_values:
+ if v >= next:
+ if not to_push:
+ ranges.append((left, next - 1))
+ to_push.append(v)
+ else:
+ ranges.append((left, v))
+ left = v + 1
+ if to_push:
+ current = range_map[i + 1]
+ range_map[i + 1] = (current[0], sort(current[1] + to_push))
TransitionKey.__verify_ranges(ranges)
return map(lambda x : TransitionKey.__create([x]), ranges)
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.