Reviewers: marja,

Message:
Committed patchset #1 manually as r18729.

Description:
Experimental parser: handling unique keys in merges

[email protected]

BUG=

Committed: https://code.google.com/p/v8/source/detail?r=18729

Please review this at https://codereview.chromium.org/132693018/

SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser

Affected files (+35, -27 lines):
  M tools/lexer_generator/nfa_builder.py
  M tools/lexer_generator/rule_parser.py
  M tools/lexer_generator/transition_keys.py


Index: tools/lexer_generator/nfa_builder.py
diff --git a/tools/lexer_generator/nfa_builder.py b/tools/lexer_generator/nfa_builder.py index 898e29692a687fd4ba32ae13b7fa7c8a54115524..5ff7e242b90cd08b944c5ecacee3a57695be3159 100644
--- a/tools/lexer_generator/nfa_builder.py
+++ b/tools/lexer_generator/nfa_builder.py
@@ -150,8 +150,8 @@ class NfaBuilder(object):
     self.__patch_ends(ends, state['start_node'])
     return (start, [])

-  def __catch_all(self, graph):
-    return self.__key_state(TransitionKey.unique('catch_all'))
+  def __unique_key(self, graph):
+    return self.__key_state(TransitionKey.unique(graph[1]))

   def __join(self, graph):
     (graph, name, subgraph) = graph[1:]
@@ -311,8 +311,8 @@ class NfaBuilder(object):
     return ('CONTINUE', graph)

   @staticmethod
-  def catch_all():
-    return ('CATCH_ALL',)
+  def unique_key(name):
+    return ('UNIQUE_KEY', name)

   @staticmethod
   def join_subgraph(graph, name, subgraph):
Index: tools/lexer_generator/rule_parser.py
diff --git a/tools/lexer_generator/rule_parser.py b/tools/lexer_generator/rule_parser.py index 29cb0ee34f1b771fa9a6aa19281fa1fde9cad2a8..061c78be8390735db86a4b2f186e5b445e9f2ea5 100644
--- a/tools/lexer_generator/rule_parser.py
+++ b/tools/lexer_generator/rule_parser.py
@@ -87,7 +87,7 @@ class RuleParser:
     if not state.current_state in state.rules:
       state.rules[state.current_state] = {
         'default_action': None,
-        'catch_all' : None,
+        'uniques' : {},
         'regex' : []
       }
     p[0] = state.current_state
@@ -113,8 +113,9 @@ class RuleParser:
       assert not rules['default_action']
       rules['default_action'] = action
     elif p[1] == 'catch_all':
-      assert not rules['catch_all']
-      rules['catch_all'] = (precedence, action)
+      assert p[1] not in rules['uniques']
+      rules['uniques'][p[1]] = True
+ rules['regex'].append((NfaBuilder.unique_key(p[1]), precedence, action))
     else:
       regex = p[1]
       rules['regex'].append((regex, precedence, action))
@@ -317,10 +318,6 @@ class RuleProcessor(object):
           graph = NfaBuilder.join_subgraph(
             graph, transition, rule_map[transition])
         graphs.append(graph)
-      if v['catch_all']:
-        (precedence, catch_all) = v['catch_all']
-        assert catch_all == (None, None, 'continue'), "unimplemented"
-        graphs.append(NfaBuilder.add_continue(NfaBuilder.catch_all()))
       graph = NfaBuilder.or_graphs(graphs)
       rule_map[subgraph] = graph
     # process first the subgraphs, then the default graph
Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py b/tools/lexer_generator/transition_keys.py index 857e8629e2560cbdc551b84b43aad4d577be152d..fbea55620bd26b496d3de7204d6f8b51f35683b4 100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -126,19 +126,17 @@ class TransitionKey(object):
   @staticmethod
   def __verify_ranges(encoding, ranges, check_merged):
     assert ranges
-    if len(ranges) == 1 and TransitionKey.__is_unique_range(ranges[0]):
-      return
     last = None
     for r in ranges:
-      assert not TransitionKey.__is_unique_range(r)
-      r_is_class = encoding.is_class_range(r)
+      r_is_class = (TransitionKey.__is_unique_range(r) or
+                    encoding.is_class_range(r))
       # Assert that the ranges are in order.
       if last != None and check_merged:
         assert last[1] + 1 < r[0] or r_is_class
-      last = r
-
-  def __is_unique(self):
- return len(self.__ranges) == 1 and self.__is_unique_range(self.__ranges[0])
+      if r_is_class:
+        last = None
+      else:
+        last = r

   @staticmethod
   def __cached_key(encoding, name, bounds_getter):
@@ -237,7 +235,7 @@ class TransitionKey(object):
   def is_superset_of_key(self, key):
     '''Returns true if 'key' is a sub-key of this TransitionKey.'''
     assert isinstance(key, self.__class__)
-    assert key != TransitionKey.epsilon() and not key.__is_unique()
+    assert key != TransitionKey.epsilon()
     assert len(key.__ranges) == 1
     subkey = key.__ranges[0]
     matches = False
@@ -278,14 +276,24 @@ class TransitionKey(object):
   @staticmethod
   def __class_name(encoding, r):
     for name, v in encoding.class_range_iter():
-      if r == v: return name
+      if r == v:
+        return name
+    assert False
+
+  @staticmethod
+  def __unique_name(r):
+    for name, v in TransitionKey.__cached_keys['no_encoding'].items():
+      if v.__ranges and r == v.__ranges[0]:
+        return name[2:]
     assert False

   def range_iter(self, encoding):
-    assert not self == TransitionKey.epsilon() and not self.__is_unique()
+    assert not self == TransitionKey.epsilon()
     for r in self.__ranges:
-      if encoding.is_class_range(r):
-        yield ('CLASS', TransitionKey.__class_name(encoding, r))
+      if self.__is_unique_range(r):
+        yield ('UNIQUE', self.__unique_name(r))
+      elif encoding.is_class_range(r):
+        yield ('CLASS', self.__class_name(encoding, r))
       else:
         yield ('PRIMARY_RANGE', r)

@@ -297,6 +305,8 @@ class TransitionKey(object):

   @staticmethod
   def __range_str(encoding, r):
+    if TransitionKey.__is_unique_range(r):
+      return TransitionKey.__unique_name(r)
     if encoding and encoding.is_class_range(r):
       return TransitionKey.__class_name(encoding, r)
     def to_str(x):
@@ -364,7 +374,6 @@ class TransitionKey(object):
       return []
     range_map = {}
     for x in key_set:
-      assert not x.__is_unique()
       assert x != TransitionKey.epsilon()
       for r in x.__ranges:
         if not r[0] in range_map:
@@ -404,6 +413,7 @@ class TransitionKey(object):
   def __key_from_ranges(encoding, invert, ranges):
     range_map = {}
     for r in ranges:
+      assert r
       if not r[0] in range_map:
         range_map[r[0]] = []
       range_map[r[0]].append(r[1])
@@ -418,10 +428,11 @@ class TransitionKey(object):
     merged = []
     last = None
     for r in ranges:
-      assert not TransitionKey.__is_unique_range(r)
       if last == None:
         last = r
-      elif (last[1] + 1 == r[0] and not encoding.is_class_range(r)):
+      elif (last[1] + 1 == r[0] and
+            not TransitionKey.__is_unique_range(r) and
+            not encoding.is_class_range(r)):
         last = (last[0], r[1])
       else:
         merged.append(last)


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to