Revision: 17591
Author:   [email protected]
Date:     Fri Nov  8 12:36:06 2013 UTC
Log: Experimental lexer generator: Refactoring, merge Lexer and Generator.

[email protected]
[email protected]
BUG=

Review URL: https://codereview.chromium.org/66613002
http://code.google.com/p/v8/source/detail?r=17591

Deleted:
 /branches/experimental/parser/tools/lexer_generator/lexer.py
Modified:
 /branches/experimental/parser/tools/lexer_generator/generator.py
 /branches/experimental/parser/tools/lexer_generator/lexer_test.py

=======================================
--- /branches/experimental/parser/tools/lexer_generator/lexer.py Thu Nov 7 15:42:57 2013 UTC
+++ /dev/null
@@ -1,91 +0,0 @@
-# Copyright 2013 the V8 project authors. All rights reserved.
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-#       copyright notice, this list of conditions and the following
-#       disclaimer in the documentation and/or other materials provided
-#       with the distribution.
-#     * Neither the name of Google Inc. nor the names of its
-#       contributors may be used to endorse or promote products derived
-#       from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import argparse
-from nfa import Nfa, NfaBuilder
-from dfa import Dfa
-from rule_parser import RuleParser, RuleParserState
-
-# FIXME: We need to move this to a common place!
-def process_rules(parser_state):
-  dfas = {}
-  builder = NfaBuilder()
-  builder.set_character_classes(parser_state.character_classes)
-  for k, v in parser_state.rules.items():
-    graphs = []
-    for (graph, action) in v['regex']:
-      graphs.append(NfaBuilder.add_action(graph, action))
-    nfa = builder.nfa(NfaBuilder.or_graphs(graphs))
-    (start_name, dfa_nodes) = nfa.compute_dfa()
-    dfas[k] = Dfa(start_name, dfa_nodes)
-  return dfas
-
-# Lexes strings with the help of DFAs procuded by the grammar. For sanity
-# checking the automata.
-class Lexer(object):
-
-  def __init__(self, rules):
-    parser_state = RuleParserState()
-    RuleParser.parse(rules, parser_state)
-    self.dfas = process_rules(parser_state)
-
-  def lex(self, string):
-    dfa = self.dfas['default'] # FIXME
-
-    action_stream = []
-    terminate_seen = False
-    offset = 0
-    while not terminate_seen and string:
-      result = list(dfa.lex(string))
-      last_position = 0
-      for (action, position) in result:
- action_stream.append((action[1], action[2], last_position + offset, position + 1 + offset, string[last_position:(position + 1)]))
-        last_position = position
-        if action[2] == 'terminate':
-          terminate_seen = True
-      string = string[(last_position + 1):]
-      offset += last_position
-    return action_stream
-
-if __name__ == '__main__':
-
-  parser = argparse.ArgumentParser()
-  parser.add_argument('--rules')
-  parser.add_argument('--input')
-  args = parser.parse_args()
-
-  re_file = args.rules
-  input_file = args.input
-
-  with open(re_file, 'r') as f:
-    rules = f.read()
-  with open(input_file, 'r') as f:
-    input_text = f.read() + '\0'
-
-  lexer = Lexer(rules)
-  for t in lexer.lex(input_text):
-    print t
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Fri Nov 8 11:52:04 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/generator.py Fri Nov 8 12:36:06 2013 UTC
@@ -62,83 +62,117 @@
 %s
     </script>'''

-def generate_html(data):
-  scripts = []
-  loads = []
-  for i, (name, nfa, dfa) in enumerate(data):
-    if name == 'Normal': continue
-    (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i)
-    scripts.append(script_template % (nfa_i, nfa.to_dot()))
-    scripts.append(script_template % (dfa_i, dfa.to_dot()))
-    loads.append(load_template % ("nfa [%s]" % name, nfa_i))
-    loads.append(load_template % ("dfa [%s]" % name, dfa_i))
-  body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
-  return file_template % body
+class Generator(object):

-def process_rules(parser_state):
-  rule_map = {}
-  builder = NfaBuilder()
-  builder.set_character_classes(parser_state.character_classes)
-  assert 'default' in parser_state.rules
-  def process(k, v):
-    assert 'default' in v
-    graphs = []
-    for (graph, action) in v['regex']:
-      (precedence, code, transition) = action
-      if code:
-        graph = NfaBuilder.add_action(graph, (precedence, code, None))
-      if transition == 'continue':
-        if not v['default'][1][2] == 'continue':
-          graph = NfaBuilder.add_continue(graph)
+  def __init__(self, rules):
+    parser_state = RuleParserState()
+    RuleParser.parse(rules, parser_state)
+    self.__automata = {}
+    self.process_rules(parser_state)
+
+  def generate_html(self):
+    scripts = []
+    loads = []
+    for i, name in enumerate(self.__automata):
+      (nfa, dfa) = self.__automata[name]
+      if name == 'Normal': continue
+      (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i)
+      scripts.append(script_template % (nfa_i, nfa.to_dot()))
+      scripts.append(script_template % (dfa_i, dfa.to_dot()))
+      loads.append(load_template % ("nfa [%s]" % name, nfa_i))
+      loads.append(load_template % ("dfa [%s]" % name, dfa_i))
+      body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
+      return file_template % body
+
+  def process_rules(self, parser_state):
+    rule_map = {}
+    builder = NfaBuilder()
+    builder.set_character_classes(parser_state.character_classes)
+    assert 'default' in parser_state.rules
+    def process(k, v):
+      assert 'default' in v
+      graphs = []
+      for (graph, action) in v['regex']:
+        (precedence, code, transition) = action
+        if code:
+          graph = NfaBuilder.add_action(graph, (precedence, code, None))
+        if transition == 'continue':
+          if not v['default'][1][2] == 'continue':
+            graph = NfaBuilder.add_continue(graph)
+          else:
+            pass # TODO null key
+        elif (transition == 'break' or
+              transition == 'terminate' or
+              transition == 'terminate_illegal'):
+          graph = NfaBuilder.add_action(graph, (10000, transition, None))
         else:
-          pass # TODO null key
-      elif (transition == 'break' or
-            transition == 'terminate' or
-            transition == 'terminate_illegal'):
-        graph = NfaBuilder.add_action(graph, (10000, transition, None))
-      else:
-        assert k == 'default'
- graph = NfaBuilder.join_subgraph(graph, transition, rule_map[transition])
-      graphs.append(graph)
-    graph = NfaBuilder.or_graphs(graphs)
-    # merge default action
-    (precedence, code, transition) = v['default'][1]
-    assert transition == 'continue' or transition == 'break'
-    if transition == 'continue':
-      assert k != 'default'
-      graph = NfaBuilder.add_incoming_action(graph, (10000, k, None))
-    if code:
- graph = NfaBuilder.add_incoming_action(graph, (precedence, code, None))
-    rule_map[k] = graph
-  for k, v in parser_state.rules.items():
-    if k == 'default': continue
-    process(k, v)
-  process('default', parser_state.rules['default'])
-  html_data = []
-  for rule_name, graph in rule_map.items():
-    nfa = builder.nfa(graph)
-    (start, dfa_nodes) = nfa.compute_dfa()
-    dfa = Dfa(start, dfa_nodes)
-    html_data.append((rule_name, nfa, dfa))
-  return html_data
+          assert k == 'default'
+ graph = NfaBuilder.join_subgraph(graph, transition, rule_map[transition])
+        graphs.append(graph)
+      graph = NfaBuilder.or_graphs(graphs)
+      # merge default action
+      (precedence, code, transition) = v['default'][1]
+      assert transition == 'continue' or transition == 'break'
+      if transition == 'continue':
+        assert k != 'default'
+        graph = NfaBuilder.add_incoming_action(graph, (10000, k, None))
+      if code:
+ graph = NfaBuilder.add_incoming_action(graph, (precedence, code, None))
+      rule_map[k] = graph
+    for k, v in parser_state.rules.items():
+      if k == 'default': continue
+      process(k, v)
+    process('default', parser_state.rules['default'])
+    for rule_name, graph in rule_map.items():
+      nfa = builder.nfa(graph)
+      (start, dfa_nodes) = nfa.compute_dfa()
+      dfa = Dfa(start, dfa_nodes)
+      self.__automata[rule_name] = (nfa, dfa)
+
+  # Lexes strings with the help of DFAs procuded by the grammar. For sanity
+  # checking the automata.
+  def lex(self, string):
+    (nfa, dfa) = self.__automata['default'] # FIXME
+
+    action_stream = []
+    terminate_seen = False
+    offset = 0
+    while not terminate_seen and string:
+      result = list(dfa.lex(string))
+      last_position = 0
+      for (action, position) in result:
+ action_stream.append((action[1], action[2], last_position + offset, position + 1 + offset, string[last_position:(position + 1)]))
+        last_position = position
+        if action[2] == 'terminate':
+          terminate_seen = True
+      string = string[(last_position + 1):]
+      offset += last_position
+    return action_stream

 if __name__ == '__main__':

   parser = argparse.ArgumentParser()
   parser.add_argument('--html')
   parser.add_argument('--re', default='src/lexer/lexer_py.re')
+  parser.add_argument('--input')
   args = parser.parse_args()

   re_file = args.re
   parser_state = RuleParserState()
   print "parsing %s" % re_file
   with open(re_file, 'r') as f:
-    RuleParser.parse(f.read(), parser_state)
-  html_data = process_rules(parser_state)
+    generator = Generator(f.read())

   html_file = args.html
   if html_file:
-    html = generate_html(html_data)
+    html = generator.generate_html()
     with open(args.html, 'w') as f:
       f.write(html)
       print "wrote html to %s" % html_file
+
+  input_file = args.input
+  if input_file:
+    with open(input_file, 'r') as f:
+      input_text = f.read() + '\0'
+    for t in generator.lex(input_text):
+      print t
=======================================
--- /branches/experimental/parser/tools/lexer_generator/lexer_test.py Fri Nov 8 10:28:35 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/lexer_test.py Fri Nov 8 12:36:06 2013 UTC
@@ -26,7 +26,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 import unittest
-from lexer import Lexer
+from generator import Generator

 class LexerTestCase(unittest.TestCase):

@@ -38,32 +38,34 @@
   def test_simple(self):
     rules = '''
     <default>
-    "("           { LBRACE }
-    ")"           { RBRACE }
+    "("           { LBRACE } <<continue>>
+    ")"           { RBRACE } <<continue>>

-    "foo"         { FOO }
-    eof           <<terminate>>'''
+    "foo"         { FOO } <<continue>>
+    eof           <<terminate>>
+    default       <<break>>'''

-    lexer = Lexer(rules)
+    generator = Generator(rules)

     string = 'foo()\0'
     self.__verify_action_stream(
-        lexer.lex(string),
+        generator.lex(string),
         [('FOO', 'foo'), ('LBRACE', '('), ('RBRACE', ')')])

   def test_maximal_matching(self):
     rules = '''
     <default>
-    "<"           { LT }
-    "<<"          { SHL }
-    " "           { SPACE }
-    eof           <<terminate>>'''
+    "<"           { LT } <<continue>>
+    "<<"          { SHL } <<continue>>
+    " "           { SPACE } <<continue>>
+    eof           <<terminate>>
+    default       <<break>>'''

-    lexer = Lexer(rules)
+    generator = Generator(rules)

     string = '<< <\0'
     self.__verify_action_stream(
-        lexer.lex(string),
+        generator.lex(string),
         [('SHL', '<<'), ('SPACE', ' '), ('LT', '<')])


@@ -75,12 +77,13 @@
     digit = [0-9];
     number = (digit+ ("." digit+)?);
     <default>
-    number        { NUMBER }
-    eof           <<terminate>>'''
+    number        { NUMBER } <<continue>>
+    eof           <<terminate>>
+    default       <<break>>'''

-    lexer = Lexer(rules)
+    generator = Generator(rules)

     string = '555\0'
     self.__verify_action_stream(
-        lexer.lex(string),
+        generator.lex(string),
         [('NUMBER', '555')])

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to