Reviewers: ,

Message:
Committed patchset #1 manually as r17446.

Description:
Experimental parser: better disjoint keys

BUG=

Committed: https://code.google.com/p/v8/source/detail?r=17446

Please review this at https://codereview.chromium.org/54183004/

SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser

Affected files (+38, -18 lines):
  M tools/lexer_generator/automata_test.py
  M tools/lexer_generator/transition_keys.py


Index: tools/lexer_generator/automata_test.py
diff --git a/tools/lexer_generator/automata_test.py b/tools/lexer_generator/automata_test.py index ca4abe37f02d5de3486e9247e8c75bad16b94aee..abe33e8731a473cd5a519bf049bbf9c7deb23680 100644
--- a/tools/lexer_generator/automata_test.py
+++ b/tools/lexer_generator/automata_test.py
@@ -52,8 +52,8 @@ class AutomataTestCase(unittest.TestCase):
       (".", ["a", "b"], ["", "aa"]),
       (".*", ["", "a", "abcaabbcc"], []),
       ("a.b", ["aab", "abb", "acb"], ["ab", ""]),
-      # ("a.?b", ["aab", "abb", "acb", "ab"], ["aaab", ""]),
-      # ("a.+b", ["aab", "abb", "acb", "ab"], ["aaab", ""]),
+      ("a.?b", ["aab", "abb", "acb", "ab"], ["aaab", ""]),
+      ("a.+b", ["aab", "abb", "acb"], ["aaac", "ab", ""]),
       (".|.", ["a", "b"], ["aa", ""]),
     ]

Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py b/tools/lexer_generator/transition_keys.py index f84745de29a424ac6202b1f9a7504c3a9245706d..da7e88be4f67044d51b1c1e72c00b34c405be820 100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -24,6 +24,7 @@
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+from string import printable

 class TransitionKey:

@@ -48,7 +49,7 @@ class TransitionKey:
   def __create(ranges):
     TransitionKey.__verify_ranges(ranges)
     key = TransitionKey()
-    key.__ranges = ranges
+    key.__ranges = tuple(ranges) # immutable
     key.__cached_hash = None
     return key

@@ -97,6 +98,7 @@ class TransitionKey:
     subkey = key.__ranges[0]
     for k in self.__ranges:
       if k[0] <= subkey[0] and k[1] >= subkey[1]: return True
+    # TODO assert disjoint
     return False

   def __hash__(self):
@@ -109,20 +111,25 @@ class TransitionKey:
   def __eq__(self, other):
return isinstance(other, self.__class__) and self.__ranges == other.__ranges

+  __printable_cache = {}
+
   @staticmethod
   def __print_range(r):
     def to_str(x):
       if x <= TransitionKey.__latin_1_upper_bound:
-        return chr(x)
+        if not x in TransitionKey.__printable_cache:
+          res = "'%s'" % chr(x) if chr(x) in printable else str(x)
+          TransitionKey.__printable_cache[x] = res
+        return TransitionKey.__printable_cache[x]
       if x == TransitionKey.__unicode_literal_bounds[0]:
         return "literal"
       if x == TransitionKey.__unicode_whitespace_bounds[0]:
         return "whitespace"
       assert False
     if r[0] == r[1]:
-      return "'%s'" % to_str(r[0])
+      return "%s" % to_str(r[0])
     else:
-      return "['%s'-'%s']" % (to_str(r[0]), to_str(r[1]))
+      return "[%s-%s]" % (to_str(r[0]), to_str(r[1]))

   def __str__(self):
     if self == self.epsilon():
@@ -133,17 +140,30 @@ class TransitionKey:

   @staticmethod
   def disjoint_keys(key_set):
-    ranges = sorted(reduce(lambda acc, x : acc + x.__ranges, key_set, []))
-    for i in range(0, len(ranges)):
-      right = ranges[i][1]
-      limit = i
-      for j in range(i + 1, len(ranges)):
-        if ranges[j][0] > right:
-          break
-        assert ranges[j][0] >= ranges[i][0]
-        limit = j
-      if limit == i: continue
-      for j in range(i + 1, limit + 1):
-        ranges[j] = (right, ranges[j][1])
+    range_map = {}
+    for x in key_set:
+      for r in x.__ranges:
+        if not r[0] in range_map:
+          range_map[r[0]] = []
+        range_map[r[0]].append(r[1])
+    sort = lambda x : sorted(set(x))
+    range_map = sorted(map(lambda (k, v): (k, sort(v)), range_map.items()))
+    ranges = []
+    upper_bound = TransitionKey.__upper_bound + 1
+    for i in range(len(range_map)):
+      (left, left_values) = range_map[i]
+ next = range_map[i + 1][0] if i != len(range_map) - 1 else upper_bound
+      to_push = []
+      for v in left_values:
+        if v >= next:
+          if not to_push:
+            ranges.append((left, next - 1))
+          to_push.append(v)
+        else:
+          ranges.append((left, v))
+          left = v + 1
+      if to_push:
+        current = range_map[i + 1]
+        range_map[i + 1] = (current[0], sort(current[1] + to_push))
     TransitionKey.__verify_ranges(ranges)
     return map(lambda x : TransitionKey.__create([x]), ranges)


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to